Mercurial > vec
changeset 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | b0a3f0248ecc |
children | |
files | gen/README gen/genaltivec.c gen/gendouble.c gen/gengcc.c gen/gengeneric.c gen/genlib.c gen/genlib.h gen/genvsx.c include/vec/defs.h include/vec/impl/double.h include/vec/impl/gcc.h include/vec/impl/generic.h include/vec/impl/ppc/altivec.h include/vec/impl/ppc/vsx.h include/vec/impl/x86/sse2.h include/vec/impl/x86/sse3.h include/vec/vec.h test/Makefile.template test/test.c test/test_align.h test/test_arith.h test/test_benchmark.h test/test_benchmark_simple.c test/test_benchmark_vec.c test/test_compare.h utils/genaltivec.c utils/gendouble.c utils/gengcc.c utils/gengeneric.c |
diffstat | 29 files changed, 37160 insertions(+), 8869 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/README Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,23 @@ +These files are used to generate the actual implementation headers in +`include/vec/impl`. + +All of them are basically compiled the same way: + gcc -o genIMPL genIMPL.c genlib.c + +You can generally base any new implementations off of one of the existing +ones. Preferably, you would base it off the generic implementation, since +it actually has all of the operations implemented (and serves as a +reference as to how these operations *should* work). For example the avg +operation on integers should be roughly equivalent to: + ceil((vec1 + vec2) / 2) + +Note how it always rounds up, rather than truncating towards zero. This +is an important implementation detail, and stems from roots in AltiVec, +as it was the inspiration behind much of the vec API. + +Note however, that avg has a different result with floating points that +is equivalent to simply + ((vec1 + vec2) / 2) +as there is no real way to get around any possible truncation. + +Any overflow on integer operations should simply wrap around.
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/genaltivec.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,250 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +/* ------------------------------------------------------------------------ */ + +/* #define USE_VSX_EXTENSIONS */ +/* #define USE_POWER8_EXTENSIONS */ + +static int altivec_check(int op, int type, int bits, int size) +{ + switch (bits) { + case 8: + case 16: + case 32: +#ifdef USE_VSX_EXTENSIONS + case 64: +# ifndef USE_POWER8_EXTENSIONS + /* VSX has double, but not int64 */ + if ((bits == 64) && (type != TYPE_FLOAT)) + return 0; +# endif +#endif + if (bits * size == 128) + return 1; + default: + break; + } + + return 0; +} + +static int altivec_check_int(int op, int type, int bits, int size) +{ + return (altivec_check(op, type, bits, size) && type != TYPE_FLOAT); +} + +static int altivec_check_float(int op, int type, int bits, int size) +{ + return (altivec_check(op, type, bits, size) && type == TYPE_FLOAT); +} + +static void altivec_ppcheck(int op, int type, int bits, int size) +{ + /* old gcc had a broken partial implementation + * (why even bother adding it at all?) */ + switch (op) { + case OP_MUL: printf("defined(vec_mul)"); break; + case OP_SPLAT: printf("defined(vec_splats)"); break; + } +} + +static void altivec_splat(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_splats(x);"); + puts("\treturn vec;"); +} + +static void altivec_load(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));"); + puts("\treturn vec;"); +} + +static void altivec_load_aligned(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + puts("\tvec.altivec = vec_ld(0, x);"); + puts("\treturn vec;"); +} + +static void altivec_store_aligned(int op, int type, int bits, int size) +{ + puts("\tvec_st(vec.altivec, 0, x);"); +} + +/* no store? */ + +static void altivec_print_native_type(int type, int bits) +{ + /* WITH DIRECTION AND MAGNITUDE! */ + printf("vector "); + + switch (type) { + case TYPE_INT: + printf("signed "); + break; + case TYPE_UINT: + printf("unsigned "); + break; + case TYPE_FLOAT: + /* nothing */ + break; + } + + switch (type) { + case TYPE_INT: + case TYPE_UINT: + switch (bits) { + case 8: printf("char"); break; + case 16: printf("short"); break; + case 32: printf("int"); break; + case 64: printf("long long"); break; + default: break; + } + break; + case TYPE_FLOAT: + switch (bits) { + case 32: printf("float"); break; + case 64: printf("double"); break; + default: break; + } + } +} + +static void altivec_2op(int op, int type, int bits, int size) +{ + static const char *op_altivec[] = { + [OP_ADD] = "add", + [OP_SUB] = "sub", + [OP_MUL] = "mul", + [OP_DIV] = "div", + [OP_MOD] = "mod", + [OP_AND] = "and", + [OP_OR] = "or", + [OP_XOR] = "xor", + [OP_CMPLT] = "cmplt", + [OP_CMPEQ] = "cmpeq", + [OP_CMPGT] = "cmpgt", + [OP_CMPLE] = "cmple", + [OP_CMPGE] = "cmpge", + [OP_MIN] = "min", + [OP_MAX] = "max", + [OP_AVG] = "avg", + [OP_LSHIFT] = "sl", + [OP_LRSHIFT] = "sr", + }; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + if (op == OP_RSHIFT) { + printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (type == TYPE_INT) ? "a" : ""); + } else { + printf("\tvec.altivec = ("); + altivec_print_native_type(type, bits); + printf(")vec_%s(vec1.altivec, vec2.altivec);\n", op_altivec[op]); + } + + puts("\treturn vec;"); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {altivec_check, NULL, altivec_splat}, + [OP_LOAD_ALIGNED] = {altivec_check, NULL, altivec_load_aligned}, + [OP_LOAD] = {altivec_check, NULL, altivec_load}, + [OP_STORE_ALIGNED] = {altivec_check, NULL, altivec_store_aligned}, + + /* arithmetic */ + [OP_ADD] = {altivec_check, NULL, altivec_2op}, + [OP_SUB] = {altivec_check, NULL, altivec_2op}, + [OP_MUL] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + /* GCC fails to compile integer division, so limit to floats */ + [OP_DIV] = {altivec_check_float, NULL, altivec_2op}, +#endif +#if 0 + /* This is Power10. I don't have any Power10 hardware :) + * (well, I also don't have any VSX hardware. whatever) */ + [OP_MOD] = {altivec_check_int, NULL, altivec_2op}, +#endif + [OP_AVG] = {altivec_check_int, NULL, altivec_2op}, + + /* bitwise */ + [OP_AND] = {altivec_check, NULL, altivec_2op}, + [OP_OR] = {altivec_check, NULL, altivec_2op}, + [OP_XOR] = {altivec_check, NULL, altivec_2op}, + + /* min/max */ + [OP_MIN] = {altivec_check, NULL, altivec_2op}, + [OP_MAX] = {altivec_check, NULL, altivec_2op}, + + /* bitshift */ + [OP_LSHIFT] = {altivec_check, NULL, altivec_2op}, + [OP_LRSHIFT] = {altivec_check, NULL, altivec_2op}, + [OP_RSHIFT] = {altivec_check, NULL, altivec_2op}, + + /* comparison */ + [OP_CMPLT] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + [OP_CMPLE] = {altivec_check, NULL, altivec_2op}, +#endif + [OP_CMPEQ] = {altivec_check, NULL, altivec_2op}, +#ifdef USE_VSX_EXTENSIONS + [OP_CMPGE] = {altivec_check, NULL, altivec_2op}, +#endif + [OP_CMPGT] = {altivec_check, NULL, altivec_2op}, +}; + + +int main(void) +{ + gen(op_impl, +#ifdef USE_POWER8_EXTENSIONS + "power8" +#elif defined(USE_VSX_EXTENSIONS) + "vsx" +#else + "altivec" +#endif + ); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/gendouble.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,275 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +/* XXX: would it be faster to unroll literally everything instead of defining everything, + * and then unpacking it all? */ +static const char *header = + "/**\n" + " * vec - a tiny SIMD vector library in C99\n" + " * \n" + " * Copyright (c) 2024-2025 Paper\n" + " * \n" + " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" + " * of this software and associated documentation files (the \"Software\"), to deal\n" + " * in the Software without restriction, including without limitation the rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software is\n" + " * furnished to do so, subject to the following conditions:\n" + " * \n" + " * The above copyright notice and this permission notice shall be included in all\n" + " * copies or substantial portions of the Software.\n" + " * \n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" + " * SOFTWARE.\n" + "**/\n" + "\n" + "/* This file is automatically generated! Do not edit it directly!\n" + " * Edit the code that generates it in utils/gendbl.c --paper */\n" + "\n" + "#ifndef VEC_IMPL_DOUBLE_H_\n" + "#define VEC_IMPL_DOUBLE_H_\n" + "\n" + "#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \\\n" + " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n" + " { \\\n" + " v##sign##int##bits##x##size vec; \\\n" + " \\\n" + " vec.dbl[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" + " vec.dbl[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" + " \\\n" + " return vec; \\\n" + " }\n" + "\n" + "#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \\\n" + " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n" + " { \\\n" + " v##sign##int##bits##x##size vec; \\\n" + " \\\n" + " vec.dbl[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n" + " vec.dbl[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n" + " \\\n" + " return vec; \\\n" + " }\n" + "\n" + "#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)\n" + "#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n" + "\n" + "#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \\\n" + " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n" + " { \\\n" + " v##sign##int##bits##x##halfsize##_##name(vec.dbl[0], x); \\\n" + " v##sign##int##bits##x##halfsize##_##name(vec.dbl[1], x + halfsize); \\\n" + " }\n" + "\n" + "#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)\n" + "#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)\n" + "\n" + "#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \\\n" + " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n" + " { \\\n" + " vec1.dbl[0] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[0], vec2.dbl[0]); \\\n" + " vec1.dbl[1] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[1], vec2.dbl[1]); \\\n" + " \\\n" + " return vec1; \\\n" + " }\n" + "\n" + "#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)\n" + "#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)\n" + "#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)\n" + "#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)\n" + "#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)\n" + "\n" + "#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \\\n" + " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n" + " { \\\n" + " vec.dbl[0] = v##sign##int##bits##x##halfsize##_not(vec.dbl[0]); \\\n" + " vec.dbl[1] = v##sign##int##bits##x##halfsize##_not(vec.dbl[1]); \\\n" + " \\\n" + " return vec; \\\n" + " }\n" + "\n" + "#endif /* VEC_IMPL_DOUBLE_H_ */ \n" + "\n" + "/* ------------------------------------------------------------------------ */\n" + "/* PREPROCESSOR HELL INCOMING */\n" + ""; + +static const char *footer = + "" /* nothing */; + +/* ------------------------------------------------------------------------ */ + +static void op_print_pp_halfsize(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("defined(V%s%dx%d_%s_DEFINED)", type_str[type].u, bits, size / 2, op_info->u); +} + +static void op_print_twoop(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("\tvec1.dbl[0] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec1.dbl[0], vec2.dbl[0]);\n", op_info->l); + + printf("\tvec1.dbl[1] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec1.dbl[1], vec2.dbl[1]);\n", op_info->l); + + printf("\treturn vec1;\n"); +} + +static void op_print_unoop(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("\tvec.dbl[0] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec.dbl[0]);\n", op_info->l); + + printf("\tvec1.dbl[1] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec.dbl[1]);\n", op_info->l); + + printf("\treturn vec;\n"); +} + +static inline void op_print_load(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + printf("\tvec.dbl[0] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(x);\n", op_info->l); + + printf("\tvec.dbl[1] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(x + %d);\n", op_info->l, size / 2); + + printf("\treturn vec;\n"); +} + +static inline void op_print_splat(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + printf("\tvec.dbl[0] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(x);\n", op_info->l); + + printf("\tvec.dbl[1] = "); + gen_print_vtype(type, bits, size / 2); + printf("_%s(x);\n", op_info->l); + + printf("\treturn vec;\n"); +} + +static inline void op_print_store(int op, int type, int bits, int size) +{ + struct op_info *op_info = gen_op_info(op); + + printf("\t"); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec.dbl[0], x);\n", op_info->l); + + printf("\t"); + gen_print_vtype(type, bits, size / 2); + printf("_%s(vec.dbl[1], x + %d);\n", op_info->l, size / 2); +} + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {NULL, op_print_pp_halfsize, op_print_splat}, + [OP_LOAD_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_load}, + [OP_LOAD] = {NULL, op_print_pp_halfsize, op_print_load}, + [OP_STORE_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_store}, + [OP_STORE] = {NULL, op_print_pp_halfsize, op_print_store}, + + /* arithmetic */ + [OP_ADD] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_SUB] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_MUL] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_DIV] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_MOD] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_AVG] = {NULL, op_print_pp_halfsize, op_print_twoop}, + + /* bitwise */ + [OP_AND] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_OR] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_XOR] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_NOT] = {NULL, op_print_pp_halfsize, op_print_unoop}, + + /* min/max */ + [OP_MIN] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_MAX] = {NULL, op_print_pp_halfsize, op_print_twoop}, + + /* bitshift */ + [OP_LSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_LRSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_RSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop}, + + /* comparison */ + [OP_CMPLT] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_CMPLE] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_CMPEQ] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_CMPGE] = {NULL, op_print_pp_halfsize, op_print_twoop}, + [OP_CMPGT] = {NULL, op_print_pp_halfsize, op_print_twoop}, +}; + +int main(void) +{ + gen(op_impl, "double"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/gengcc.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,219 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +static int gcc_minmax_only_integer(int op, int type, int bits, int size) +{ + return (type == TYPE_INT || type == TYPE_UINT); +} + +/* ------------------------------------------------------------------------ */ + +static void pp_gcc_prereq_4_3_0(int op, int type, int bits, int size) +{ + printf("VEC_GNUC_ATLEAST(4, 3, 0)"); +} + +static void gcc_print_easy_op(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_ADD] = "+", + [OP_SUB] = "-", + [OP_MUL] = "*", + [OP_DIV] = "/", + [OP_MOD] = "%", + [OP_AND] = "&", + [OP_OR] = "|", + [OP_XOR] = "^", + [OP_CMPLT] = "<", + [OP_CMPLE] = "<=", + [OP_CMPEQ] = "==", + [OP_CMPGE] = ">=", + [OP_CMPGT] = ">", + }; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\treturn vec1;\n"); +} + +static void gcc_print_splat(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + for (i = 0; i < size; i++) + printf("\tvec.gcc[%d] = x;\n", i); + printf("\treturn vec;\n"); +} + +static void gcc_print_load_aligned(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); + printf("\treturn vec;\n"); +} + +static void gcc_print_load(int op, int type, int bits, int size) +{ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + puts("\tmemcpy(&vec, x, sizeof(vec));"); + printf("\treturn vec;\n"); +} + +static void gcc_print_store_aligned(int op, int type, int bits, int size) +{ + puts("\t*(__typeof__(vec.gcc) *)x = vec.gcc;"); +} + +static void gcc_print_store(int op, int type, int bits, int size) +{ + puts("\tmemcpy(x, &vec, sizeof(vec));"); +} + +static void gcc_print_rorlshift(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_LSHIFT] = "<<", + [OP_RSHIFT] = ">>", + }; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\treturn vec1;\n"); +} + +static void gcc_print_lrshift(int op, int type, int bits, int size) +{ + printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); + printf("\treturn vec1;\n"); +} + +static void gcc_print_minmax(int op, int type, int bits, int size) +{ + static const char *op_builtins[] = { + [OP_MIN] = "<", + [OP_MAX] = ">" + }; + + switch (type) { + case TYPE_INT: + case TYPE_UINT: + /* yay */ + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" mask;\n"); + printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); + printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); + printf("\treturn vec1;\n"); + break; + default: + /* hm? */ + break; + } +} + +static void gcc_print_avg(int op, int type, int bits, int size) +{ + switch (type) { + case TYPE_INT: + printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); + puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); + puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); + puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); + puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); + puts(""); + printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); + break; + case TYPE_UINT: + printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n"); + break; + case TYPE_FLOAT: + printf("\tvec1.gcc = (vec1.gcc + vec2.gcc) / 2;\n"); + break; + } + + printf("\treturn vec1;\n"); +} + +static void gcc_print_not(int op, int type, int bits, int size) +{ + printf("\tvec.gcc = ~vec.gcc;\n"); + printf("\treturn vec;\n"); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {NULL, NULL, gcc_print_splat}, + [OP_LOAD_ALIGNED] = {NULL, NULL, gcc_print_load_aligned}, + [OP_LOAD] = {NULL, NULL, gcc_print_load}, + [OP_STORE_ALIGNED] = {NULL, NULL, gcc_print_store_aligned}, + [OP_STORE] = {NULL, NULL, gcc_print_store}, + + /* arithmetic */ + [OP_ADD] = {NULL, NULL, gcc_print_easy_op}, + [OP_SUB] = {NULL, NULL, gcc_print_easy_op}, + [OP_MUL] = {NULL, NULL, gcc_print_easy_op}, +#if 0 + /* no defined divide by zero behavior */ + [OP_DIV] = {NULL, NULL, gcc_print_easy_op}, + [OP_MOD] = {NULL, NULL, gcc_print_easy_op}, +#endif + [OP_AVG] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_avg}, + + /* bitwise */ + [OP_AND] = {NULL, NULL, gcc_print_easy_op}, + [OP_OR] = {NULL, NULL, gcc_print_easy_op}, + [OP_XOR] = {NULL, NULL, gcc_print_easy_op}, + [OP_NOT] = {NULL, NULL, gcc_print_not}, + + /* min/max */ + [OP_MIN] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, + [OP_MAX] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, + + /* bitshift */ + [OP_LSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, + [OP_LRSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_lrshift}, + [OP_RSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, + + /* comparison */ + [OP_CMPLT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPLE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPEQ] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPGE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, + [OP_CMPGT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, +}; + +int main(void) +{ + gen(op_impl, "gcc"); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/gengeneric.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,274 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +static void op_builtin_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_ADD] = "+", + [OP_SUB] = "-", + [OP_MUL] = "*", + [OP_AND] = "&", + [OP_OR] = "|", + [OP_XOR] = "^", + }; + int i; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i); + + puts("\treturn vec1;"); + + (void)bits; +} + +static void op_builtin_avg_pbody(int op, int type, int bits, int size) +{ + int i; + + switch (type) { + case TYPE_INT: + printf("\t"); + gen_print_stype(type, bits); + printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n"); + + for (i = 0; i < size; i++) + printf( + "\tx_d_rem = (vec1.generic[%d] % 2);\n" + "\ty_d_rem = (vec2.generic[%d] % 2);\n" + "\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n" + "\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n" + "\n" + "\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n" + , i, i, i, i, i); + break; + case TYPE_UINT: + for (i = 0; i < size; i++) + printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i); + break; + case TYPE_FLOAT: + /* this is probably fine. */ + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i); + break; + } + + printf("\treturn vec1;\n"); +} + +static void op_builtin_not_pbody(int op, int type, int bits, int size) +{ + int i; + + for (i = 0; i < size; i++) + printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i); + + puts("\treturn vec;"); +} + +static void op_builtin_shift_pbody(int op, int type, int bits, int size) +{ + int i; + + switch (type) { + case TYPE_UINT: { + const char *ops[] = { + [OP_LSHIFT] = "<<", + [OP_RSHIFT] = ">>", + [OP_LRSHIFT] = ">>", + }; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]); + break; + } + case TYPE_INT: { + switch (op) { + case OP_LSHIFT: + case OP_LRSHIFT: { + const char *ops[] = { + [OP_LSHIFT] = "<<", + [OP_LRSHIFT] = ">>", + }; + + printf("\tunion { "); + gen_print_stype(TYPE_UINT, bits); + printf(" u; "); + gen_print_stype(TYPE_INT, bits); + puts(" s; } x;\n"); + + for (i = 0; i < size; i++) + printf( + "\tx.s = vec1.generic[%d];\n" + "\tx.u %s= vec2.generic[%d];\n" + "\tvec1.generic[%d] = x.s;\n", + i, ops[op], i, i); + break; + } + case OP_RSHIFT: + for (i = 0; i < size; i++) + printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i); + break; + } + break; + } + } + + puts("\treturn vec1;"); +} + +static void op_builtin_nonzero_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_DIV] = "/", + [OP_MOD] = "%", + }; + int i; + + if (op == OP_MOD && type == TYPE_FLOAT) { + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i); + } else { + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i); + } + + puts("\treturn vec1;"); + + (void)bits; +} + +static void op_cmp_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_CMPLT] = "<", + [OP_CMPLE] = "<=", + [OP_CMPEQ] = "==", + [OP_CMPGE] = ">=", + [OP_CMPGT] = ">", + }; + int i; + + /* this is portable for int uint and float*/ + for (i = 0; i < size; i++) + printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8); + + puts("\treturn vec1;"); +} + +static void op_minmax_pbody(int op, int type, int bits, int size) +{ + const char *ops[OP_FINAL_] = { + [OP_MIN] = "<", + [OP_MAX] = ">", + }; + int i; + + for (i = 0; i < size; i++) + printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i); + + puts("\treturn vec1;"); +} + +static void op_splat_pbody(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + for (i = 0; i < size; i++) + printf("\tvec.generic[%d] = x;\n", i); + + puts("\treturn vec;"); +} + +static void op_load_pbody(int op, int type, int bits, int size) +{ + int i; + + printf("\t"); + gen_print_vtype(type, bits, size); + printf(" vec;\n"); + + printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size); + + puts("\treturn vec;"); +} + +static void op_store_pbody(int op, int type, int bits, int size) +{ + printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size); +} + +/* ------------------------------------------------------------------------ */ + +static struct op_impl op_impl[OP_FINAL_] = { + [OP_SPLAT] = {NULL, NULL, op_splat_pbody}, + [OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody}, + [OP_LOAD] = {NULL, NULL, op_load_pbody}, + [OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody}, + [OP_STORE] = {NULL, NULL, op_store_pbody}, + + /* arithmetic */ + [OP_ADD] = {NULL, NULL, op_builtin_pbody}, + [OP_SUB] = {NULL, NULL, op_builtin_pbody}, + [OP_MUL] = {NULL, NULL, op_builtin_pbody}, + [OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody}, + [OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody}, + [OP_AVG] = {NULL, NULL, op_builtin_avg_pbody}, + + /* bitwise */ + [OP_AND] = {NULL, NULL, op_builtin_pbody}, + [OP_OR] = {NULL, NULL, op_builtin_pbody}, + [OP_XOR] = {NULL, NULL, op_builtin_pbody}, + [OP_NOT] = {NULL, NULL, op_builtin_not_pbody}, + + /* min/max */ + [OP_MIN] = {NULL, NULL, op_minmax_pbody}, + [OP_MAX] = {NULL, NULL, op_minmax_pbody}, + + /* bitshift */ + [OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + [OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + [OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody}, + + /* comparison */ + [OP_CMPLT] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPLE] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPEQ] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPGE] = {NULL, NULL, op_cmp_pbody}, + [OP_CMPGT] = {NULL, NULL, op_cmp_pbody}, +}; + +int main(void) +{ + gen(op_impl, "generic"); + + return 0; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/genlib.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,294 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include "genlib.h" + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +struct strs type_str[] = { + [TYPE_INT] = {"int", "INT"}, + [TYPE_UINT] = {"uint", "UINT"}, + [TYPE_FLOAT] = {"f", "F"}, +}; + +void gen_print_vtype(int type, int bits, int size) +{ + printf("v%s%dx%d", type_str[type].l, bits, size); +} + +void gen_print_stype(int type, int bits) +{ + printf("vec_%s%d", type_str[type].l, bits); +} + +static void vret(int op, int type, int bits, int size) +{ + gen_print_vtype(type, bits, size); + + (void)op; +} + +static void nret(int op, int type, int bits, int size) +{ + printf("void"); + + (void)op, (void)type, (void)bits, (void)size; +} + +static void voneparam(int op, int type, int bits, int size) +{ + gen_print_vtype(type, bits, size); + printf(" vec"); + + (void)op; +} + +static void vtwoparam(int op, int type, int bits, int size) +{ + gen_print_vtype(type, bits, size); + printf(" vec1, "); + gen_print_vtype(type, bits, size); + printf(" vec2"); + + (void)op; +} + +static void vshiftparam(int op, int type, int bits, int size) +{ + gen_print_vtype(type, bits, size); + printf(" vec1, "); + gen_print_vtype(TYPE_UINT, bits, size); + printf(" vec2"); + + (void)op; +} + +static void vloadparam(int op, int type, int bits, int size) +{ + printf("const "); + gen_print_stype(type, bits); + printf(" x[%d]", size); + + (void)op; +} + +static void vsplatparam(int op, int type, int bits, int size) +{ + gen_print_stype(type, bits); + printf(" x"); + + (void)op, (void)size; +} + +static void vstoreparam(int op, int type, int bits, int size) +{ + gen_print_vtype(type, bits, size); + printf(" vec, "); + gen_print_stype(type, bits); + printf(" x[%d]", size); + + (void)op; +} + +struct op_info ops[] = { + [OP_SPLAT] = {"SPLAT", "splat", vret, vsplatparam}, + [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned", vret, vloadparam}, + [OP_LOAD] = {"LOAD", "load", vret, vloadparam}, + [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned", nret, vstoreparam}, + [OP_STORE] = {"STORE", "store", nret, vstoreparam}, + [OP_ADD] = {"ADD", "add", vret, vtwoparam}, + [OP_SUB] = {"SUB", "sub", vret, vtwoparam}, + [OP_MUL] = {"MUL", "mul", vret, vtwoparam}, + [OP_DIV] = {"DIV", "div", vret, vtwoparam}, + [OP_MOD] = {"MOD", "mod", vret, vtwoparam}, + [OP_AVG] = {"AVG", "avg", vret, vtwoparam}, + [OP_AND] = {"AND", "and", vret, vtwoparam}, + [OP_OR] = {"OR", "or", vret, vtwoparam}, + [OP_XOR] = {"XOR", "xor", vret, vtwoparam}, + [OP_NOT] = {"NOT", "not", vret, voneparam}, + [OP_CMPLT] = {"CMPLT", "cmplt", vret, vtwoparam}, + [OP_CMPEQ] = {"CMPEQ", "cmpeq", vret, vtwoparam}, + [OP_CMPGT] = {"CMPGT", "cmpgt", vret, vtwoparam}, + [OP_CMPLE] = {"CMPLE", "cmple", vret, vtwoparam}, + [OP_CMPGE] = {"CMPGE", "cmpge", vret, vtwoparam}, + [OP_MIN] = {"MIN", "min", vret, vtwoparam}, + [OP_MAX] = {"MAX", "max", vret, vtwoparam}, + [OP_RSHIFT] = {"RSHIFT", "rshift", vret, vshiftparam}, + [OP_LRSHIFT] = {"LRSHIFT", "lrshift", vret, vshiftparam}, + [OP_LSHIFT] = {"LSHIFT", "lshift", vret, vshiftparam}, +}; + +struct op_info *gen_op_info(int op) +{ + return &ops[op]; +} + +/* okay */ +extern int (*genlib_test(void))[(ARRAY_SIZE(ops) == OP_FINAL_) ? 1 : -2]; + +int op_impl_check_always(int op, int type, int bits, int size) +{ + return 1; + + (void)op, (void)type, (void)bits, (void)size; +} + +static inline int verify_op(int op, int type) +{ + switch (op) { + case OP_AND: + case OP_XOR: + case OP_OR: + case OP_NOT: + case OP_RSHIFT: + case OP_LSHIFT: + case OP_LRSHIFT: + /* these operations make no sense for floating point */ + if (type == TYPE_FLOAT) + return 0; + break; + } + + return 1; +} + +/* XXX: would it be faster to unroll literally everything instead of defining everything, + * and then unpacking it all? */ +static const char *header_tmpl = + "/**\n" + " * vec - a tiny SIMD vector library in C99\n" + " * \n" + " * Copyright (c) 2024-2025 Paper\n" + " * \n" + " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" + " * of this software and associated documentation files (the \"Software\"), to deal\n" + " * in the Software without restriction, including without limitation the rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software is\n" + " * furnished to do so, subject to the following conditions:\n" + " * \n" + " * The above copyright notice and this permission notice shall be included in all\n" + " * copies or substantial portions of the Software.\n" + " * \n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" + " * SOFTWARE.\n" + "**/\n" + "\n" + "/* This file is automatically generated! Do not edit it directly!\n" + " * Edit the code that generates it in utils/gen%s.c --paper */\n" + "\n" + "/* ------------------------------------------------------------------------ */\n" + "/* PREPROCESSOR HELL INCOMING */\n\n"; + +int gen(struct op_impl op_impl[OP_FINAL_], const char *name) +{ + static struct { + int type, bits, size; + } types[] = { +#define INT_TYPE(bits, size) {TYPE_INT, bits, size}, {TYPE_UINT, bits, size} + + INT_TYPE(8, 2), + INT_TYPE(8, 4), + INT_TYPE(8, 8), + INT_TYPE(8, 16), + INT_TYPE(8, 32), + INT_TYPE(8, 64), + + INT_TYPE(16, 2), + INT_TYPE(16, 4), + INT_TYPE(16, 8), + INT_TYPE(16, 16), + INT_TYPE(16, 32), + + INT_TYPE(32, 2), + INT_TYPE(32, 4), + INT_TYPE(32, 8), + INT_TYPE(32, 16), + + INT_TYPE(64, 2), + INT_TYPE(64, 4), + INT_TYPE(64, 8), + +#undef INT_TYPE + + /* float */ + {TYPE_FLOAT, 32, 2}, + {TYPE_FLOAT, 32, 4}, + {TYPE_FLOAT, 32, 8}, + {TYPE_FLOAT, 32, 16}, + + /* double */ + {TYPE_FLOAT, 64, 2}, + {TYPE_FLOAT, 64, 4}, + {TYPE_FLOAT, 64, 8}, + }; + int op; + size_t s; + + printf(header_tmpl, name); + + for (s = 0; s < ARRAY_SIZE(types); s++) { + for (op = 0; op < OP_FINAL_; op++) { + if (!op_impl[op].pbody) + continue; /* What? */ + + if (op_impl[op].check && !op_impl[op].check(op, types[s].type, types[s].bits, types[s].size)) + continue; + + if (!verify_op(op, types[s].type)) + continue; + + printf("#if !defined(V%s%dx%d_%s_DEFINED)", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u); + + if (op_impl[op].ppcheck) { + printf(" \\\n\t && ("); + op_impl[op].ppcheck(op, types[s].type, types[s].bits, types[s].size); + printf(")"); + } + + puts(""); + + printf("VEC_FUNC_IMPL "); + ops[op].pret(op, types[s].type, types[s].bits, types[s].size); + printf(" "); + gen_print_vtype(types[s].type, types[s].bits, types[s].size); + printf("_%s(", ops[op].l); + ops[op].pparam(op, types[s].type, types[s].bits, types[s].size); + puts(")\n{"); + + op_impl[op].pbody(op, types[s].type, types[s].bits, types[s].size); + + puts("}"); + + printf("# define V%s%dx%d_%s_DEFINED\n", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u); + puts("#endif"); + } + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/genlib.h Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,108 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +enum { + OP_SPLAT, + OP_LOAD_ALIGNED, + OP_LOAD, + OP_STORE_ALIGNED, + OP_STORE, + OP_ADD, + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + OP_AVG, + OP_AND, + OP_OR, + OP_XOR, + OP_NOT, + OP_CMPLT, + OP_CMPEQ, + OP_CMPGT, + OP_CMPLE, /* these are after the other ones to make */ + OP_CMPGE, /* implementing them as simple as NOT(CMPLT|CMPGT) */ + OP_MIN, + OP_MAX, + OP_RSHIFT, + OP_LRSHIFT, + OP_LSHIFT, + + /* use this for array sizes and the like */ + OP_FINAL_, +}; + +enum { + TYPE_INT, /* signed int */ + TYPE_UINT, /* unsigned int */ + TYPE_FLOAT, /* IEEE float */ +}; + +struct op_info { + const char *u; + const char *l; + + /* print return type to stdout */ + void (*pret)(int op, int type, int bits, int size); + + /* print params type to stdout */ + void (*pparam)(int op, int type, int bits, int size); +}; + +struct strs { + const char *l; + const char *u; +}; + +extern struct strs type_str[]; + +struct op_info *gen_op_info(int op); + +struct op_impl { + /* return 1 if it's implemented for a specific set of + * inputs :) + * + * if this function is not implemented, and `pbody` + * is not NULL, then it is assumed that there are + * no restrictions on what type, bits, or size can + * be used. beware! */ + int (*check)(int op, int type, int bits, int size); + + /* prints any additional preprocessor checks needed + * should start with a conditional, usually && */ + void (*ppcheck)(int op, int type, int bits, int size); + + /* sherman? + * (this prints the actual body of the function...) */ + void (*pbody)(int op, int type, int bits, int size); +}; + +int gen(struct op_impl op_impl[OP_FINAL_], const char *name); + +void gen_print_vtype(int type, int bits, int size); +void gen_print_stype(int type, int bits);
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gen/genvsx.c Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,28 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +/* ok */ + +#define USE_VSX_EXTENSIONS +#include "genaltivec.c" \ No newline at end of file
--- a/include/vec/defs.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/defs.h Wed Apr 30 18:36:38 2025 -0400 @@ -27,12 +27,19 @@ #include <string.h> #include <stdlib.h> +#include <math.h> /* fmod */ + +#ifdef __has_include +# define VEC_HAS_INCLUDE(x) __has_include(x) +#else +# define VEC_HAS_INCLUDE(x) +#endif #ifdef VEC_CUSTOM_INTEGER_TYPEDEF /* we already have custom integer typedefs; */ # include "impl/integer.h" #else -# if __cplusplus >= (201103L) +# if defined(__cplusplus) && VEC_HAS_INCLUDE(<cstdint>) && VEC_HAS_INCLUDE(<cstddef>) # include <cstdint> # include <cstddef> typedef std::size_t vec_uintsize; @@ -49,7 +56,7 @@ typedef std::int32_t vec_int32; typedef std::int64_t vec_int64; typedef std::intmax_t vec_intmax; -# elif __STDC_VERSION__ >= 199901L +# elif (__STDC_VERSION__ >= 199901L) || (VEC_HAS_INCLUDE(<stdint.h>) && VEC_HAS_INCLUDE(<stddef.h>)) # include <stdint.h> # include <stddef.h> typedef uint8_t vec_uint8; @@ -67,6 +74,11 @@ # else # error Unable to find integer types with known size. # endif + +/* this isn't necessarily true, but who cares :) */ +typedef float vec_f32; +typedef double vec_f64; + #endif #define VEC_SEMVER_ATLEAST(a, b, c, x, y, z) \
--- a/include/vec/impl/double.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/double.h Wed Apr 30 18:36:38 2025 -0400 @@ -23,3699 +23,10311 @@ **/ /* This file is automatically generated! Do not edit it directly! - * Edit the code that generates it in utils/gengeneric.c --paper */ - -#ifndef VEC_IMPL_DOUBLE_H_ -#define VEC_IMPL_DOUBLE_H_ - -#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \ - { \ - v##sign##int##bits##x##size vec; \ - \ - vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \ - vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \ - \ - return vec; \ - } - -#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \ - { \ - v##sign##int##bits##x##size vec; \ - \ - vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \ - vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \ - \ - return vec; \ - } - -#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize) -#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize) - -#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \ - VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \ - { \ - v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \ - v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \ - } - -#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize) -#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize) - -#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \ - { \ - vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \ - vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \ - \ - return vec1; \ - } - -#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u) -#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u) -#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u) -#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign) -#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign) - -#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \ - { \ - vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \ - vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \ - \ - return vec; \ - } - -#endif /* VEC_IMPL_DOUBLE_H_ */ + * Edit the code that generates it in utils/gendouble.c --paper */ /* ------------------------------------------------------------------------ */ /* PREPROCESSOR HELL INCOMING */ - - -/* vuint8x4 */ - -#if !defined(VINT8x4_SPLAT_DEFINED) && defined(VINT8x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x2_SPLAT_DEFINED) \ + && (defined(VINT8x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x) +{ + vint8x2 vec; + vec.dbl[0] = vint8x1_splat(x); + vec.dbl[1] = vint8x1_splat(x); + return vec; +} +# define VINT8x2_SPLAT_DEFINED +#endif +#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2]) +{ + vint8x2 vec; + vec.dbl[0] = vint8x1_load_aligned(x); + vec.dbl[1] = vint8x1_load_aligned(x + 1); + return vec; +} +# define VINT8x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x2_LOAD_DEFINED) \ + && (defined(VINT8x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2]) +{ + vint8x2 vec; + vec.dbl[0] = vint8x1_load(x); + vec.dbl[1] = vint8x1_load(x + 1); + return vec; +} +# define VINT8x2_LOAD_DEFINED +#endif +#if !defined(VINT8x2_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2]) +{ + vint8x1_store_aligned(vec.dbl[0], x); + vint8x1_store_aligned(vec.dbl[1], x + 1); +} +# define VINT8x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x2_STORE_DEFINED) \ + && (defined(VINT8x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2]) +{ + vint8x1_store(vec.dbl[0], x); + vint8x1_store(vec.dbl[1], x + 1); +} +# define VINT8x2_STORE_DEFINED +#endif +#if !defined(VINT8x2_ADD_DEFINED) \ + && (defined(VINT8x1_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_ADD_DEFINED +#endif +#if !defined(VINT8x2_SUB_DEFINED) \ + && (defined(VINT8x1_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_SUB_DEFINED +#endif +#if !defined(VINT8x2_MUL_DEFINED) \ + && (defined(VINT8x1_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_MUL_DEFINED +#endif +#if !defined(VINT8x2_DIV_DEFINED) \ + && (defined(VINT8x1_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_div(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_DIV_DEFINED +#endif +#if !defined(VINT8x2_MOD_DEFINED) \ + && (defined(VINT8x1_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_mod(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_MOD_DEFINED +#endif +#if !defined(VINT8x2_AVG_DEFINED) \ + && (defined(VINT8x1_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_AVG_DEFINED +#endif +#if !defined(VINT8x2_AND_DEFINED) \ + && (defined(VINT8x1_AND_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_AND_DEFINED +#endif +#if !defined(VINT8x2_OR_DEFINED) \ + && (defined(VINT8x1_OR_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_OR_DEFINED +#endif +#if !defined(VINT8x2_XOR_DEFINED) \ + && (defined(VINT8x1_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_XOR_DEFINED +#endif +#if !defined(VINT8x2_NOT_DEFINED) \ + && (defined(VINT8x1_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec) +{ + vec.dbl[0] = vint8x1_not(vec.dbl[0]); + vec1.dbl[1] = vint8x1_not(vec.dbl[1]); + return vec; +} +# define VINT8x2_NOT_DEFINED +#endif +#if !defined(VINT8x2_CMPLT_DEFINED) \ + && (defined(VINT8x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_CMPLT_DEFINED +#endif +#if !defined(VINT8x2_CMPEQ_DEFINED) \ + && (defined(VINT8x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_CMPEQ_DEFINED +#endif +#if !defined(VINT8x2_CMPGT_DEFINED) \ + && (defined(VINT8x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_CMPGT_DEFINED +#endif +#if !defined(VINT8x2_CMPLE_DEFINED) \ + && (defined(VINT8x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_CMPLE_DEFINED +#endif +#if !defined(VINT8x2_CMPGE_DEFINED) \ + && (defined(VINT8x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_CMPGE_DEFINED +#endif +#if !defined(VINT8x2_MIN_DEFINED) \ + && (defined(VINT8x1_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_MIN_DEFINED +#endif +#if !defined(VINT8x2_MAX_DEFINED) \ + && (defined(VINT8x1_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_MAX_DEFINED +#endif +#if !defined(VINT8x2_RSHIFT_DEFINED) \ + && (defined(VINT8x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_RSHIFT_DEFINED +#endif +#if !defined(VINT8x2_LRSHIFT_DEFINED) \ + && (defined(VINT8x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x2_LSHIFT_DEFINED) \ + && (defined(VINT8x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vint8x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT8x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x2_SPLAT_DEFINED) \ + && (defined(VUINT8x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x) +{ + vuint8x2 vec; + vec.dbl[0] = vuint8x1_splat(x); + vec.dbl[1] = vuint8x1_splat(x); + return vec; +} +# define VUINT8x2_SPLAT_DEFINED +#endif +#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2]) +{ + vuint8x2 vec; + vec.dbl[0] = vuint8x1_load_aligned(x); + vec.dbl[1] = vuint8x1_load_aligned(x + 1); + return vec; +} +# define VUINT8x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x2_LOAD_DEFINED) \ + && (defined(VUINT8x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2]) +{ + vuint8x2 vec; + vec.dbl[0] = vuint8x1_load(x); + vec.dbl[1] = vuint8x1_load(x + 1); + return vec; +} +# define VUINT8x2_LOAD_DEFINED +#endif +#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2]) +{ + vuint8x1_store_aligned(vec.dbl[0], x); + vuint8x1_store_aligned(vec.dbl[1], x + 1); +} +# define VUINT8x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x2_STORE_DEFINED) \ + && (defined(VUINT8x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2]) +{ + vuint8x1_store(vec.dbl[0], x); + vuint8x1_store(vec.dbl[1], x + 1); +} +# define VUINT8x2_STORE_DEFINED +#endif +#if !defined(VUINT8x2_ADD_DEFINED) \ + && (defined(VUINT8x1_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_ADD_DEFINED +#endif +#if !defined(VUINT8x2_SUB_DEFINED) \ + && (defined(VUINT8x1_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_SUB_DEFINED +#endif +#if !defined(VUINT8x2_MUL_DEFINED) \ + && (defined(VUINT8x1_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_MUL_DEFINED +#endif +#if !defined(VUINT8x2_DIV_DEFINED) \ + && (defined(VUINT8x1_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_div(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_DIV_DEFINED +#endif +#if !defined(VUINT8x2_MOD_DEFINED) \ + && (defined(VUINT8x1_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_mod(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_MOD_DEFINED +#endif +#if !defined(VUINT8x2_AVG_DEFINED) \ + && (defined(VUINT8x1_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_AVG_DEFINED +#endif +#if !defined(VUINT8x2_AND_DEFINED) \ + && (defined(VUINT8x1_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_AND_DEFINED +#endif +#if !defined(VUINT8x2_OR_DEFINED) \ + && (defined(VUINT8x1_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_OR_DEFINED +#endif +#if !defined(VUINT8x2_XOR_DEFINED) \ + && (defined(VUINT8x1_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_XOR_DEFINED +#endif +#if !defined(VUINT8x2_NOT_DEFINED) \ + && (defined(VUINT8x1_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec) +{ + vec.dbl[0] = vuint8x1_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x1_not(vec.dbl[1]); + return vec; +} +# define VUINT8x2_NOT_DEFINED +#endif +#if !defined(VUINT8x2_CMPLT_DEFINED) \ + && (defined(VUINT8x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_CMPLT_DEFINED +#endif +#if !defined(VUINT8x2_CMPEQ_DEFINED) \ + && (defined(VUINT8x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x2_CMPGT_DEFINED) \ + && (defined(VUINT8x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_CMPGT_DEFINED +#endif +#if !defined(VUINT8x2_CMPLE_DEFINED) \ + && (defined(VUINT8x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_CMPLE_DEFINED +#endif +#if !defined(VUINT8x2_CMPGE_DEFINED) \ + && (defined(VUINT8x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_CMPGE_DEFINED +#endif +#if !defined(VUINT8x2_MIN_DEFINED) \ + && (defined(VUINT8x1_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_MIN_DEFINED +#endif +#if !defined(VUINT8x2_MAX_DEFINED) \ + && (defined(VUINT8x1_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_MAX_DEFINED +#endif +#if !defined(VUINT8x2_RSHIFT_DEFINED) \ + && (defined(VUINT8x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x2_LRSHIFT_DEFINED) \ + && (defined(VUINT8x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x2_LSHIFT_DEFINED) \ + && (defined(VUINT8x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.dbl[0] = vuint8x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT8x2_LSHIFT_DEFINED +#endif +#if !defined(VINT8x4_SPLAT_DEFINED) \ + && (defined(VINT8x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x) +{ + vint8x4 vec; + vec.dbl[0] = vint8x2_splat(x); + vec.dbl[1] = vint8x2_splat(x); + return vec; +} # define VINT8x4_SPLAT_DEFINED #endif - -#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) && defined(VINT8x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4]) +{ + vint8x4 vec; + vec.dbl[0] = vint8x2_load_aligned(x); + vec.dbl[1] = vint8x2_load_aligned(x + 2); + return vec; +} # define VINT8x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT8x4_LOAD_DEFINED) && defined(VINT8x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_LOAD_DEFINED) \ + && (defined(VINT8x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4]) +{ + vint8x4 vec; + vec.dbl[0] = vint8x2_load(x); + vec.dbl[1] = vint8x2_load(x + 2); + return vec; +} # define VINT8x4_LOAD_DEFINED #endif - -#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) && defined(VINT8x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4]) +{ + vint8x2_store_aligned(vec.dbl[0], x); + vint8x2_store_aligned(vec.dbl[1], x + 2); +} # define VINT8x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT8x4_STORE_DEFINED) && defined(VINT8x2_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_STORE_DEFINED) \ + && (defined(VINT8x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4]) +{ + vint8x2_store(vec.dbl[0], x); + vint8x2_store(vec.dbl[1], x + 2); +} # define VINT8x4_STORE_DEFINED #endif - -#if !defined(VINT8x4_ADD_DEFINED) && defined(VINT8x2_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_ADD_DEFINED) \ + && (defined(VINT8x2_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_ADD_DEFINED #endif - -#if !defined(VINT8x4_SUB_DEFINED) && defined(VINT8x2_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_SUB_DEFINED) \ + && (defined(VINT8x2_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_SUB_DEFINED #endif - -#if !defined(VINT8x4_MUL_DEFINED) && defined(VINT8x2_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_MUL_DEFINED) \ + && (defined(VINT8x2_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_MUL_DEFINED #endif - -#if !defined(VINT8x4_DIV_DEFINED) && defined(VINT8x2_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_DIV_DEFINED) \ + && (defined(VINT8x2_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_div(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_DIV_DEFINED #endif - -#if !defined(VINT8x4_MOD_DEFINED) && defined(VINT8x2_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_MOD_DEFINED) \ + && (defined(VINT8x2_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_mod(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_MOD_DEFINED #endif - -#if !defined(VINT8x4_AVG_DEFINED) && defined(VINT8x2_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_AVG_DEFINED) \ + && (defined(VINT8x2_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_AVG_DEFINED #endif - -#if !defined(VINT8x4_AND_DEFINED) && defined(VINT8x2_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_AND_DEFINED) \ + && (defined(VINT8x2_AND_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_AND_DEFINED #endif - -#if !defined(VINT8x4_OR_DEFINED) && defined(VINT8x2_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_OR_DEFINED) \ + && (defined(VINT8x2_OR_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_OR_DEFINED #endif - -#if !defined(VINT8x4_XOR_DEFINED) && defined(VINT8x2_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_XOR_DEFINED) \ + && (defined(VINT8x2_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_XOR_DEFINED #endif - -#if !defined(VINT8x4_NOT_DEFINED) && defined(VINT8x2_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_NOT_DEFINED) \ + && (defined(VINT8x2_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec) +{ + vec.dbl[0] = vint8x2_not(vec.dbl[0]); + vec1.dbl[1] = vint8x2_not(vec.dbl[1]); + return vec; +} # define VINT8x4_NOT_DEFINED #endif - -#if !defined(VINT8x4_CMPLT_DEFINED) && defined(VINT8x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_CMPLT_DEFINED) \ + && (defined(VINT8x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_CMPLT_DEFINED #endif - -#if !defined(VINT8x4_CMPEQ_DEFINED) && defined(VINT8x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_CMPEQ_DEFINED) \ + && (defined(VINT8x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_CMPEQ_DEFINED #endif - -#if !defined(VINT8x4_CMPGT_DEFINED) && defined(VINT8x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_CMPGT_DEFINED) \ + && (defined(VINT8x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_CMPGT_DEFINED #endif - -#if !defined(VINT8x4_CMPLE_DEFINED) && defined(VINT8x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_CMPLE_DEFINED) \ + && (defined(VINT8x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_CMPLE_DEFINED #endif - -#if !defined(VINT8x4_CMPGE_DEFINED) && defined(VINT8x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_CMPGE_DEFINED) \ + && (defined(VINT8x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_CMPGE_DEFINED #endif - -#if !defined(VINT8x4_MIN_DEFINED) && defined(VINT8x2_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_MIN_DEFINED) \ + && (defined(VINT8x2_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_MIN_DEFINED #endif - -#if !defined(VINT8x4_MAX_DEFINED) && defined(VINT8x2_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_MAX_DEFINED) \ + && (defined(VINT8x2_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_MAX_DEFINED #endif - -#if !defined(VINT8x4_RSHIFT_DEFINED) && defined(VINT8x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_RSHIFT_DEFINED) \ + && (defined(VINT8x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_RSHIFT_DEFINED #endif - -#if !defined(VINT8x4_LRSHIFT_DEFINED) && defined(VINT8x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_LRSHIFT_DEFINED) \ + && (defined(VINT8x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_LRSHIFT_DEFINED #endif - -#if !defined(VINT8x4_LSHIFT_DEFINED) && defined(VINT8x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 8, 4, 2) +#if !defined(VINT8x4_LSHIFT_DEFINED) \ + && (defined(VINT8x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vint8x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x4_LSHIFT_DEFINED #endif - - - -/* vint8x4 */ - -#if !defined(VUINT8x4_SPLAT_DEFINED) && defined(VUINT8x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 8, 4, 2) +#if !defined(VUINT8x4_SPLAT_DEFINED) \ + && (defined(VUINT8x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x) +{ + vuint8x4 vec; + vec.dbl[0] = vuint8x2_splat(x); + vec.dbl[1] = vuint8x2_splat(x); + return vec; +} # define VUINT8x4_SPLAT_DEFINED #endif - -#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) && defined(VUINT8x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 8, 4, 2) +#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4]) +{ + vuint8x4 vec; + vec.dbl[0] = vuint8x2_load_aligned(x); + vec.dbl[1] = vuint8x2_load_aligned(x + 2); + return vec; +} # define VUINT8x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x4_LOAD_DEFINED) && defined(VUINT8x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 8, 4, 2) +#if !defined(VUINT8x4_LOAD_DEFINED) \ + && (defined(VUINT8x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4]) +{ + vuint8x4 vec; + vec.dbl[0] = vuint8x2_load(x); + vec.dbl[1] = vuint8x2_load(x + 2); + return vec; +} # define VUINT8x4_LOAD_DEFINED #endif - -#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) && defined(VUINT8x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 8, 4, 2) +#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4]) +{ + vuint8x2_store_aligned(vec.dbl[0], x); + vuint8x2_store_aligned(vec.dbl[1], x + 2); +} # define VUINT8x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x4_STORE_DEFINED) && defined(VUINT8x2_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 8, 4, 2) +#if !defined(VUINT8x4_STORE_DEFINED) \ + && (defined(VUINT8x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4]) +{ + vuint8x2_store(vec.dbl[0], x); + vuint8x2_store(vec.dbl[1], x + 2); +} # define VUINT8x4_STORE_DEFINED #endif - -#if !defined(VUINT8x4_ADD_DEFINED) && defined(VUINT8x2_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 8, 4, 2) +#if !defined(VUINT8x4_ADD_DEFINED) \ + && (defined(VUINT8x2_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_ADD_DEFINED #endif - -#if !defined(VUINT8x4_SUB_DEFINED) && defined(VUINT8x2_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 8, 4, 2) +#if !defined(VUINT8x4_SUB_DEFINED) \ + && (defined(VUINT8x2_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_SUB_DEFINED #endif - -#if !defined(VUINT8x4_MUL_DEFINED) && defined(VUINT8x2_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 8, 4, 2) +#if !defined(VUINT8x4_MUL_DEFINED) \ + && (defined(VUINT8x2_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_MUL_DEFINED #endif - -#if !defined(VUINT8x4_DIV_DEFINED) && defined(VUINT8x2_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 8, 4, 2) +#if !defined(VUINT8x4_DIV_DEFINED) \ + && (defined(VUINT8x2_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_div(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_DIV_DEFINED #endif - -#if !defined(VUINT8x4_MOD_DEFINED) && defined(VUINT8x2_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 8, 4, 2) +#if !defined(VUINT8x4_MOD_DEFINED) \ + && (defined(VUINT8x2_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_mod(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_MOD_DEFINED #endif - -#if !defined(VUINT8x4_AVG_DEFINED) && defined(VUINT8x2_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 8, 4, 2) +#if !defined(VUINT8x4_AVG_DEFINED) \ + && (defined(VUINT8x2_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_AVG_DEFINED #endif - -#if !defined(VUINT8x4_AND_DEFINED) && defined(VUINT8x2_AND_DEFINED) -VEC_DOUBLE_AND(u, 8, 4, 2) +#if !defined(VUINT8x4_AND_DEFINED) \ + && (defined(VUINT8x2_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_AND_DEFINED #endif - -#if !defined(VUINT8x4_OR_DEFINED) && defined(VUINT8x2_OR_DEFINED) -VEC_DOUBLE_OR(u, 8, 4, 2) +#if !defined(VUINT8x4_OR_DEFINED) \ + && (defined(VUINT8x2_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_OR_DEFINED #endif - -#if !defined(VUINT8x4_XOR_DEFINED) && defined(VUINT8x2_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 8, 4, 2) +#if !defined(VUINT8x4_XOR_DEFINED) \ + && (defined(VUINT8x2_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_XOR_DEFINED #endif - -#if !defined(VUINT8x4_NOT_DEFINED) && defined(VUINT8x2_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 8, 4, 2) +#if !defined(VUINT8x4_NOT_DEFINED) \ + && (defined(VUINT8x2_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec) +{ + vec.dbl[0] = vuint8x2_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x2_not(vec.dbl[1]); + return vec; +} # define VUINT8x4_NOT_DEFINED #endif - -#if !defined(VUINT8x4_CMPLT_DEFINED) && defined(VUINT8x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 8, 4, 2) +#if !defined(VUINT8x4_CMPLT_DEFINED) \ + && (defined(VUINT8x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_CMPLT_DEFINED #endif - -#if !defined(VUINT8x4_CMPEQ_DEFINED) && defined(VUINT8x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 8, 4, 2) +#if !defined(VUINT8x4_CMPEQ_DEFINED) \ + && (defined(VUINT8x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_CMPEQ_DEFINED #endif - -#if !defined(VUINT8x4_CMPGT_DEFINED) && defined(VUINT8x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 8, 4, 2) +#if !defined(VUINT8x4_CMPGT_DEFINED) \ + && (defined(VUINT8x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_CMPGT_DEFINED #endif - -#if !defined(VUINT8x4_CMPLE_DEFINED) && defined(VUINT8x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 8, 4, 2) +#if !defined(VUINT8x4_CMPLE_DEFINED) \ + && (defined(VUINT8x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_CMPLE_DEFINED #endif - -#if !defined(VUINT8x4_CMPGE_DEFINED) && defined(VUINT8x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 8, 4, 2) +#if !defined(VUINT8x4_CMPGE_DEFINED) \ + && (defined(VUINT8x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_CMPGE_DEFINED #endif - -#if !defined(VUINT8x4_MIN_DEFINED) && defined(VUINT8x2_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 8, 4, 2) +#if !defined(VUINT8x4_MIN_DEFINED) \ + && (defined(VUINT8x2_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_MIN_DEFINED #endif - -#if !defined(VUINT8x4_MAX_DEFINED) && defined(VUINT8x2_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 8, 4, 2) +#if !defined(VUINT8x4_MAX_DEFINED) \ + && (defined(VUINT8x2_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_MAX_DEFINED #endif - -#if !defined(VUINT8x4_RSHIFT_DEFINED) && defined(VUINT8x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 8, 4, 2) +#if !defined(VUINT8x4_RSHIFT_DEFINED) \ + && (defined(VUINT8x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_RSHIFT_DEFINED #endif - -#if !defined(VUINT8x4_LRSHIFT_DEFINED) && defined(VUINT8x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 8, 4, 2) +#if !defined(VUINT8x4_LRSHIFT_DEFINED) \ + && (defined(VUINT8x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_LRSHIFT_DEFINED #endif - -#if !defined(VUINT8x4_LSHIFT_DEFINED) && defined(VUINT8x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 8, 4, 2) +#if !defined(VUINT8x4_LSHIFT_DEFINED) \ + && (defined(VUINT8x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.dbl[0] = vuint8x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x4_LSHIFT_DEFINED #endif - - - -/* vuint8x8 */ - -#if !defined(VINT8x8_SPLAT_DEFINED) && defined(VINT8x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_SPLAT_DEFINED) \ + && (defined(VINT8x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x) +{ + vint8x8 vec; + vec.dbl[0] = vint8x4_splat(x); + vec.dbl[1] = vint8x4_splat(x); + return vec; +} # define VINT8x8_SPLAT_DEFINED #endif - -#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) && defined(VINT8x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8]) +{ + vint8x8 vec; + vec.dbl[0] = vint8x4_load_aligned(x); + vec.dbl[1] = vint8x4_load_aligned(x + 4); + return vec; +} # define VINT8x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT8x8_LOAD_DEFINED) && defined(VINT8x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_LOAD_DEFINED) \ + && (defined(VINT8x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8]) +{ + vint8x8 vec; + vec.dbl[0] = vint8x4_load(x); + vec.dbl[1] = vint8x4_load(x + 4); + return vec; +} # define VINT8x8_LOAD_DEFINED #endif - -#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) && defined(VINT8x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8]) +{ + vint8x4_store_aligned(vec.dbl[0], x); + vint8x4_store_aligned(vec.dbl[1], x + 4); +} # define VINT8x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT8x8_STORE_DEFINED) && defined(VINT8x4_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_STORE_DEFINED) \ + && (defined(VINT8x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8]) +{ + vint8x4_store(vec.dbl[0], x); + vint8x4_store(vec.dbl[1], x + 4); +} # define VINT8x8_STORE_DEFINED #endif - -#if !defined(VINT8x8_ADD_DEFINED) && defined(VINT8x4_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_ADD_DEFINED) \ + && (defined(VINT8x4_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_ADD_DEFINED #endif - -#if !defined(VINT8x8_SUB_DEFINED) && defined(VINT8x4_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_SUB_DEFINED) \ + && (defined(VINT8x4_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_SUB_DEFINED #endif - -#if !defined(VINT8x8_MUL_DEFINED) && defined(VINT8x4_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_MUL_DEFINED) \ + && (defined(VINT8x4_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_MUL_DEFINED #endif - -#if !defined(VINT8x8_DIV_DEFINED) && defined(VINT8x4_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_DIV_DEFINED) \ + && (defined(VINT8x4_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_div(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_DIV_DEFINED #endif - -#if !defined(VINT8x8_MOD_DEFINED) && defined(VINT8x4_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_MOD_DEFINED) \ + && (defined(VINT8x4_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_mod(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_MOD_DEFINED #endif - -#if !defined(VINT8x8_AVG_DEFINED) && defined(VINT8x4_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_AVG_DEFINED) \ + && (defined(VINT8x4_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_AVG_DEFINED #endif - -#if !defined(VINT8x8_AND_DEFINED) && defined(VINT8x4_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_AND_DEFINED) \ + && (defined(VINT8x4_AND_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_AND_DEFINED #endif - -#if !defined(VINT8x8_OR_DEFINED) && defined(VINT8x4_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_OR_DEFINED) \ + && (defined(VINT8x4_OR_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_OR_DEFINED #endif - -#if !defined(VINT8x8_XOR_DEFINED) && defined(VINT8x4_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_XOR_DEFINED) \ + && (defined(VINT8x4_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_XOR_DEFINED #endif - -#if !defined(VINT8x8_NOT_DEFINED) && defined(VINT8x4_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_NOT_DEFINED) \ + && (defined(VINT8x4_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec) +{ + vec.dbl[0] = vint8x4_not(vec.dbl[0]); + vec1.dbl[1] = vint8x4_not(vec.dbl[1]); + return vec; +} # define VINT8x8_NOT_DEFINED #endif - -#if !defined(VINT8x8_CMPLT_DEFINED) && defined(VINT8x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_CMPLT_DEFINED) \ + && (defined(VINT8x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_CMPLT_DEFINED #endif - -#if !defined(VINT8x8_CMPEQ_DEFINED) && defined(VINT8x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_CMPEQ_DEFINED) \ + && (defined(VINT8x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_CMPEQ_DEFINED #endif - -#if !defined(VINT8x8_CMPGT_DEFINED) && defined(VINT8x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_CMPGT_DEFINED) \ + && (defined(VINT8x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_CMPGT_DEFINED #endif - -#if !defined(VINT8x8_CMPLE_DEFINED) && defined(VINT8x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_CMPLE_DEFINED) \ + && (defined(VINT8x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_CMPLE_DEFINED #endif - -#if !defined(VINT8x8_CMPGE_DEFINED) && defined(VINT8x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_CMPGE_DEFINED) \ + && (defined(VINT8x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_CMPGE_DEFINED #endif - -#if !defined(VINT8x8_MIN_DEFINED) && defined(VINT8x4_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_MIN_DEFINED) \ + && (defined(VINT8x4_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_MIN_DEFINED #endif - -#if !defined(VINT8x8_MAX_DEFINED) && defined(VINT8x4_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_MAX_DEFINED) \ + && (defined(VINT8x4_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_MAX_DEFINED #endif - -#if !defined(VINT8x8_RSHIFT_DEFINED) && defined(VINT8x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_RSHIFT_DEFINED) \ + && (defined(VINT8x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_RSHIFT_DEFINED #endif - -#if !defined(VINT8x8_LRSHIFT_DEFINED) && defined(VINT8x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_LRSHIFT_DEFINED) \ + && (defined(VINT8x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_LRSHIFT_DEFINED #endif - -#if !defined(VINT8x8_LSHIFT_DEFINED) && defined(VINT8x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 8, 8, 4) +#if !defined(VINT8x8_LSHIFT_DEFINED) \ + && (defined(VINT8x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vint8x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x8_LSHIFT_DEFINED #endif - - - -/* vint8x8 */ - -#if !defined(VUINT8x8_SPLAT_DEFINED) && defined(VUINT8x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 8, 8, 4) +#if !defined(VUINT8x8_SPLAT_DEFINED) \ + && (defined(VUINT8x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x) +{ + vuint8x8 vec; + vec.dbl[0] = vuint8x4_splat(x); + vec.dbl[1] = vuint8x4_splat(x); + return vec; +} # define VUINT8x8_SPLAT_DEFINED #endif - -#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) && defined(VUINT8x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 8, 8, 4) +#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8]) +{ + vuint8x8 vec; + vec.dbl[0] = vuint8x4_load_aligned(x); + vec.dbl[1] = vuint8x4_load_aligned(x + 4); + return vec; +} # define VUINT8x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x8_LOAD_DEFINED) && defined(VUINT8x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 8, 8, 4) +#if !defined(VUINT8x8_LOAD_DEFINED) \ + && (defined(VUINT8x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8]) +{ + vuint8x8 vec; + vec.dbl[0] = vuint8x4_load(x); + vec.dbl[1] = vuint8x4_load(x + 4); + return vec; +} # define VUINT8x8_LOAD_DEFINED #endif - -#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) && defined(VUINT8x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 8, 8, 4) +#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8]) +{ + vuint8x4_store_aligned(vec.dbl[0], x); + vuint8x4_store_aligned(vec.dbl[1], x + 4); +} # define VUINT8x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x8_STORE_DEFINED) && defined(VUINT8x4_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 8, 8, 4) +#if !defined(VUINT8x8_STORE_DEFINED) \ + && (defined(VUINT8x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8]) +{ + vuint8x4_store(vec.dbl[0], x); + vuint8x4_store(vec.dbl[1], x + 4); +} # define VUINT8x8_STORE_DEFINED #endif - -#if !defined(VUINT8x8_ADD_DEFINED) && defined(VUINT8x4_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 8, 8, 4) +#if !defined(VUINT8x8_ADD_DEFINED) \ + && (defined(VUINT8x4_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_ADD_DEFINED #endif - -#if !defined(VUINT8x8_SUB_DEFINED) && defined(VUINT8x4_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 8, 8, 4) +#if !defined(VUINT8x8_SUB_DEFINED) \ + && (defined(VUINT8x4_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_SUB_DEFINED #endif - -#if !defined(VUINT8x8_MUL_DEFINED) && defined(VUINT8x4_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 8, 8, 4) +#if !defined(VUINT8x8_MUL_DEFINED) \ + && (defined(VUINT8x4_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_MUL_DEFINED #endif - -#if !defined(VUINT8x8_DIV_DEFINED) && defined(VUINT8x4_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 8, 8, 4) +#if !defined(VUINT8x8_DIV_DEFINED) \ + && (defined(VUINT8x4_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_div(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_DIV_DEFINED #endif - -#if !defined(VUINT8x8_MOD_DEFINED) && defined(VUINT8x4_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 8, 8, 4) +#if !defined(VUINT8x8_MOD_DEFINED) \ + && (defined(VUINT8x4_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_mod(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_MOD_DEFINED #endif - -#if !defined(VUINT8x8_AVG_DEFINED) && defined(VUINT8x4_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 8, 8, 4) +#if !defined(VUINT8x8_AVG_DEFINED) \ + && (defined(VUINT8x4_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_AVG_DEFINED #endif - -#if !defined(VUINT8x8_AND_DEFINED) && defined(VUINT8x4_AND_DEFINED) -VEC_DOUBLE_AND(u, 8, 8, 4) +#if !defined(VUINT8x8_AND_DEFINED) \ + && (defined(VUINT8x4_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_AND_DEFINED #endif - -#if !defined(VUINT8x8_OR_DEFINED) && defined(VUINT8x4_OR_DEFINED) -VEC_DOUBLE_OR(u, 8, 8, 4) +#if !defined(VUINT8x8_OR_DEFINED) \ + && (defined(VUINT8x4_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_OR_DEFINED #endif - -#if !defined(VUINT8x8_XOR_DEFINED) && defined(VUINT8x4_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 8, 8, 4) +#if !defined(VUINT8x8_XOR_DEFINED) \ + && (defined(VUINT8x4_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_XOR_DEFINED #endif - -#if !defined(VUINT8x8_NOT_DEFINED) && defined(VUINT8x4_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 8, 8, 4) +#if !defined(VUINT8x8_NOT_DEFINED) \ + && (defined(VUINT8x4_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec) +{ + vec.dbl[0] = vuint8x4_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x4_not(vec.dbl[1]); + return vec; +} # define VUINT8x8_NOT_DEFINED #endif - -#if !defined(VUINT8x8_CMPLT_DEFINED) && defined(VUINT8x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 8, 8, 4) +#if !defined(VUINT8x8_CMPLT_DEFINED) \ + && (defined(VUINT8x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_CMPLT_DEFINED #endif - -#if !defined(VUINT8x8_CMPEQ_DEFINED) && defined(VUINT8x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 8, 8, 4) +#if !defined(VUINT8x8_CMPEQ_DEFINED) \ + && (defined(VUINT8x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_CMPEQ_DEFINED #endif - -#if !defined(VUINT8x8_CMPGT_DEFINED) && defined(VUINT8x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 8, 8, 4) +#if !defined(VUINT8x8_CMPGT_DEFINED) \ + && (defined(VUINT8x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_CMPGT_DEFINED #endif - -#if !defined(VUINT8x8_CMPLE_DEFINED) && defined(VUINT8x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 8, 8, 4) +#if !defined(VUINT8x8_CMPLE_DEFINED) \ + && (defined(VUINT8x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_CMPLE_DEFINED #endif - -#if !defined(VUINT8x8_CMPGE_DEFINED) && defined(VUINT8x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 8, 8, 4) +#if !defined(VUINT8x8_CMPGE_DEFINED) \ + && (defined(VUINT8x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_CMPGE_DEFINED #endif - -#if !defined(VUINT8x8_MIN_DEFINED) && defined(VUINT8x4_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 8, 8, 4) +#if !defined(VUINT8x8_MIN_DEFINED) \ + && (defined(VUINT8x4_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_MIN_DEFINED #endif - -#if !defined(VUINT8x8_MAX_DEFINED) && defined(VUINT8x4_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 8, 8, 4) +#if !defined(VUINT8x8_MAX_DEFINED) \ + && (defined(VUINT8x4_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_MAX_DEFINED #endif - -#if !defined(VUINT8x8_RSHIFT_DEFINED) && defined(VUINT8x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 8, 8, 4) +#if !defined(VUINT8x8_RSHIFT_DEFINED) \ + && (defined(VUINT8x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_RSHIFT_DEFINED #endif - -#if !defined(VUINT8x8_LRSHIFT_DEFINED) && defined(VUINT8x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 8, 8, 4) +#if !defined(VUINT8x8_LRSHIFT_DEFINED) \ + && (defined(VUINT8x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_LRSHIFT_DEFINED #endif - -#if !defined(VUINT8x8_LSHIFT_DEFINED) && defined(VUINT8x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 8, 8, 4) +#if !defined(VUINT8x8_LSHIFT_DEFINED) \ + && (defined(VUINT8x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.dbl[0] = vuint8x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x8_LSHIFT_DEFINED #endif - - - -/* vuint8x16 */ - -#if !defined(VINT8x16_SPLAT_DEFINED) && defined(VINT8x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_SPLAT_DEFINED) \ + && (defined(VINT8x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) +{ + vint8x16 vec; + vec.dbl[0] = vint8x8_splat(x); + vec.dbl[1] = vint8x8_splat(x); + return vec; +} # define VINT8x16_SPLAT_DEFINED #endif - -#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) && defined(VINT8x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) +{ + vint8x16 vec; + vec.dbl[0] = vint8x8_load_aligned(x); + vec.dbl[1] = vint8x8_load_aligned(x + 8); + return vec; +} # define VINT8x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT8x16_LOAD_DEFINED) && defined(VINT8x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_LOAD_DEFINED) \ + && (defined(VINT8x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) +{ + vint8x16 vec; + vec.dbl[0] = vint8x8_load(x); + vec.dbl[1] = vint8x8_load(x + 8); + return vec; +} # define VINT8x16_LOAD_DEFINED #endif - -#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) && defined(VINT8x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16]) +{ + vint8x8_store_aligned(vec.dbl[0], x); + vint8x8_store_aligned(vec.dbl[1], x + 8); +} # define VINT8x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT8x16_STORE_DEFINED) && defined(VINT8x8_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_STORE_DEFINED) \ + && (defined(VINT8x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16]) +{ + vint8x8_store(vec.dbl[0], x); + vint8x8_store(vec.dbl[1], x + 8); +} # define VINT8x16_STORE_DEFINED #endif - -#if !defined(VINT8x16_ADD_DEFINED) && defined(VINT8x8_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_ADD_DEFINED) \ + && (defined(VINT8x8_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_ADD_DEFINED #endif - -#if !defined(VINT8x16_SUB_DEFINED) && defined(VINT8x8_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_SUB_DEFINED) \ + && (defined(VINT8x8_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_SUB_DEFINED #endif - -#if !defined(VINT8x16_MUL_DEFINED) && defined(VINT8x8_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_MUL_DEFINED) \ + && (defined(VINT8x8_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_MUL_DEFINED #endif - -#if !defined(VINT8x16_DIV_DEFINED) && defined(VINT8x8_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_DIV_DEFINED) \ + && (defined(VINT8x8_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_div(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_DIV_DEFINED #endif - -#if !defined(VINT8x16_MOD_DEFINED) && defined(VINT8x8_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_MOD_DEFINED) \ + && (defined(VINT8x8_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_mod(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_MOD_DEFINED #endif - -#if !defined(VINT8x16_AVG_DEFINED) && defined(VINT8x8_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_AVG_DEFINED) \ + && (defined(VINT8x8_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_AVG_DEFINED #endif - -#if !defined(VINT8x16_AND_DEFINED) && defined(VINT8x8_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_AND_DEFINED) \ + && (defined(VINT8x8_AND_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_AND_DEFINED #endif - -#if !defined(VINT8x16_OR_DEFINED) && defined(VINT8x8_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_OR_DEFINED) \ + && (defined(VINT8x8_OR_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_OR_DEFINED #endif - -#if !defined(VINT8x16_XOR_DEFINED) && defined(VINT8x8_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_XOR_DEFINED) \ + && (defined(VINT8x8_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_XOR_DEFINED #endif - -#if !defined(VINT8x16_NOT_DEFINED) && defined(VINT8x8_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_NOT_DEFINED) \ + && (defined(VINT8x8_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec) +{ + vec.dbl[0] = vint8x8_not(vec.dbl[0]); + vec1.dbl[1] = vint8x8_not(vec.dbl[1]); + return vec; +} # define VINT8x16_NOT_DEFINED #endif - -#if !defined(VINT8x16_CMPLT_DEFINED) && defined(VINT8x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_CMPLT_DEFINED) \ + && (defined(VINT8x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_CMPLT_DEFINED #endif - -#if !defined(VINT8x16_CMPEQ_DEFINED) && defined(VINT8x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_CMPEQ_DEFINED) \ + && (defined(VINT8x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_CMPEQ_DEFINED #endif - -#if !defined(VINT8x16_CMPGT_DEFINED) && defined(VINT8x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_CMPGT_DEFINED) \ + && (defined(VINT8x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_CMPGT_DEFINED #endif - -#if !defined(VINT8x16_CMPLE_DEFINED) && defined(VINT8x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_CMPLE_DEFINED) \ + && (defined(VINT8x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_CMPLE_DEFINED #endif - -#if !defined(VINT8x16_CMPGE_DEFINED) && defined(VINT8x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_CMPGE_DEFINED) \ + && (defined(VINT8x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_CMPGE_DEFINED #endif - -#if !defined(VINT8x16_MIN_DEFINED) && defined(VINT8x8_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_MIN_DEFINED) \ + && (defined(VINT8x8_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_MIN_DEFINED #endif - -#if !defined(VINT8x16_MAX_DEFINED) && defined(VINT8x8_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_MAX_DEFINED) \ + && (defined(VINT8x8_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_MAX_DEFINED #endif - -#if !defined(VINT8x16_RSHIFT_DEFINED) && defined(VINT8x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_RSHIFT_DEFINED) \ + && (defined(VINT8x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_RSHIFT_DEFINED #endif - -#if !defined(VINT8x16_LRSHIFT_DEFINED) && defined(VINT8x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_LRSHIFT_DEFINED) \ + && (defined(VINT8x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_LRSHIFT_DEFINED #endif - -#if !defined(VINT8x16_LSHIFT_DEFINED) && defined(VINT8x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 8, 16, 8) +#if !defined(VINT8x16_LSHIFT_DEFINED) \ + && (defined(VINT8x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vint8x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x16_LSHIFT_DEFINED #endif - - - -/* vint8x16 */ - -#if !defined(VUINT8x16_SPLAT_DEFINED) && defined(VUINT8x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 8, 16, 8) +#if !defined(VUINT8x16_SPLAT_DEFINED) \ + && (defined(VUINT8x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) +{ + vuint8x16 vec; + vec.dbl[0] = vuint8x8_splat(x); + vec.dbl[1] = vuint8x8_splat(x); + return vec; +} # define VUINT8x16_SPLAT_DEFINED #endif - -#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) && defined(VUINT8x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 8, 16, 8) +#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) +{ + vuint8x16 vec; + vec.dbl[0] = vuint8x8_load_aligned(x); + vec.dbl[1] = vuint8x8_load_aligned(x + 8); + return vec; +} # define VUINT8x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x16_LOAD_DEFINED) && defined(VUINT8x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 8, 16, 8) +#if !defined(VUINT8x16_LOAD_DEFINED) \ + && (defined(VUINT8x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) +{ + vuint8x16 vec; + vec.dbl[0] = vuint8x8_load(x); + vec.dbl[1] = vuint8x8_load(x + 8); + return vec; +} # define VUINT8x16_LOAD_DEFINED #endif - -#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) && defined(VUINT8x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 8, 16, 8) +#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16]) +{ + vuint8x8_store_aligned(vec.dbl[0], x); + vuint8x8_store_aligned(vec.dbl[1], x + 8); +} # define VUINT8x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x16_STORE_DEFINED) && defined(VUINT8x8_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 8, 16, 8) +#if !defined(VUINT8x16_STORE_DEFINED) \ + && (defined(VUINT8x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16]) +{ + vuint8x8_store(vec.dbl[0], x); + vuint8x8_store(vec.dbl[1], x + 8); +} # define VUINT8x16_STORE_DEFINED #endif - -#if !defined(VUINT8x16_ADD_DEFINED) && defined(VUINT8x8_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 8, 16, 8) +#if !defined(VUINT8x16_ADD_DEFINED) \ + && (defined(VUINT8x8_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_ADD_DEFINED #endif - -#if !defined(VUINT8x16_SUB_DEFINED) && defined(VUINT8x8_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 8, 16, 8) +#if !defined(VUINT8x16_SUB_DEFINED) \ + && (defined(VUINT8x8_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_SUB_DEFINED #endif - -#if !defined(VUINT8x16_MUL_DEFINED) && defined(VUINT8x8_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 8, 16, 8) +#if !defined(VUINT8x16_MUL_DEFINED) \ + && (defined(VUINT8x8_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_MUL_DEFINED #endif - -#if !defined(VUINT8x16_DIV_DEFINED) && defined(VUINT8x8_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 8, 16, 8) +#if !defined(VUINT8x16_DIV_DEFINED) \ + && (defined(VUINT8x8_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_div(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_DIV_DEFINED #endif - -#if !defined(VUINT8x16_MOD_DEFINED) && defined(VUINT8x8_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 8, 16, 8) +#if !defined(VUINT8x16_MOD_DEFINED) \ + && (defined(VUINT8x8_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_mod(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_MOD_DEFINED #endif - -#if !defined(VUINT8x16_AVG_DEFINED) && defined(VUINT8x8_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 8, 16, 8) +#if !defined(VUINT8x16_AVG_DEFINED) \ + && (defined(VUINT8x8_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_AVG_DEFINED #endif - -#if !defined(VUINT8x16_AND_DEFINED) && defined(VUINT8x8_AND_DEFINED) -VEC_DOUBLE_AND(u, 8, 16, 8) +#if !defined(VUINT8x16_AND_DEFINED) \ + && (defined(VUINT8x8_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_AND_DEFINED #endif - -#if !defined(VUINT8x16_OR_DEFINED) && defined(VUINT8x8_OR_DEFINED) -VEC_DOUBLE_OR(u, 8, 16, 8) +#if !defined(VUINT8x16_OR_DEFINED) \ + && (defined(VUINT8x8_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_OR_DEFINED #endif - -#if !defined(VUINT8x16_XOR_DEFINED) && defined(VUINT8x8_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 8, 16, 8) +#if !defined(VUINT8x16_XOR_DEFINED) \ + && (defined(VUINT8x8_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_XOR_DEFINED #endif - -#if !defined(VUINT8x16_NOT_DEFINED) && defined(VUINT8x8_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 8, 16, 8) +#if !defined(VUINT8x16_NOT_DEFINED) \ + && (defined(VUINT8x8_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec) +{ + vec.dbl[0] = vuint8x8_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x8_not(vec.dbl[1]); + return vec; +} # define VUINT8x16_NOT_DEFINED #endif - -#if !defined(VUINT8x16_CMPLT_DEFINED) && defined(VUINT8x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 8, 16, 8) +#if !defined(VUINT8x16_CMPLT_DEFINED) \ + && (defined(VUINT8x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_CMPLT_DEFINED #endif - -#if !defined(VUINT8x16_CMPEQ_DEFINED) && defined(VUINT8x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 8, 16, 8) +#if !defined(VUINT8x16_CMPEQ_DEFINED) \ + && (defined(VUINT8x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_CMPEQ_DEFINED #endif - -#if !defined(VUINT8x16_CMPGT_DEFINED) && defined(VUINT8x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 8, 16, 8) +#if !defined(VUINT8x16_CMPGT_DEFINED) \ + && (defined(VUINT8x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_CMPGT_DEFINED #endif - -#if !defined(VUINT8x16_CMPLE_DEFINED) && defined(VUINT8x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 8, 16, 8) +#if !defined(VUINT8x16_CMPLE_DEFINED) \ + && (defined(VUINT8x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_CMPLE_DEFINED #endif - -#if !defined(VUINT8x16_CMPGE_DEFINED) && defined(VUINT8x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 8, 16, 8) +#if !defined(VUINT8x16_CMPGE_DEFINED) \ + && (defined(VUINT8x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_CMPGE_DEFINED #endif - -#if !defined(VUINT8x16_MIN_DEFINED) && defined(VUINT8x8_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 8, 16, 8) +#if !defined(VUINT8x16_MIN_DEFINED) \ + && (defined(VUINT8x8_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_MIN_DEFINED #endif - -#if !defined(VUINT8x16_MAX_DEFINED) && defined(VUINT8x8_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 8, 16, 8) +#if !defined(VUINT8x16_MAX_DEFINED) \ + && (defined(VUINT8x8_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_MAX_DEFINED #endif - -#if !defined(VUINT8x16_RSHIFT_DEFINED) && defined(VUINT8x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 8, 16, 8) +#if !defined(VUINT8x16_RSHIFT_DEFINED) \ + && (defined(VUINT8x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_RSHIFT_DEFINED #endif - -#if !defined(VUINT8x16_LRSHIFT_DEFINED) && defined(VUINT8x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 8, 16, 8) +#if !defined(VUINT8x16_LRSHIFT_DEFINED) \ + && (defined(VUINT8x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_LRSHIFT_DEFINED #endif - -#if !defined(VUINT8x16_LSHIFT_DEFINED) && defined(VUINT8x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 8, 16, 8) +#if !defined(VUINT8x16_LSHIFT_DEFINED) \ + && (defined(VUINT8x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.dbl[0] = vuint8x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x16_LSHIFT_DEFINED #endif - - - -/* vuint8x32 */ - -#if !defined(VINT8x32_SPLAT_DEFINED) && defined(VINT8x16_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_SPLAT_DEFINED) \ + && (defined(VINT8x16_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x) +{ + vint8x32 vec; + vec.dbl[0] = vint8x16_splat(x); + vec.dbl[1] = vint8x16_splat(x); + return vec; +} # define VINT8x32_SPLAT_DEFINED #endif - -#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) && defined(VINT8x16_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x16_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32]) +{ + vint8x32 vec; + vec.dbl[0] = vint8x16_load_aligned(x); + vec.dbl[1] = vint8x16_load_aligned(x + 16); + return vec; +} # define VINT8x32_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT8x32_LOAD_DEFINED) && defined(VINT8x16_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_LOAD_DEFINED) \ + && (defined(VINT8x16_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32]) +{ + vint8x32 vec; + vec.dbl[0] = vint8x16_load(x); + vec.dbl[1] = vint8x16_load(x + 16); + return vec; +} # define VINT8x32_LOAD_DEFINED #endif - -#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) && defined(VINT8x16_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x16_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32]) +{ + vint8x16_store_aligned(vec.dbl[0], x); + vint8x16_store_aligned(vec.dbl[1], x + 16); +} # define VINT8x32_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT8x32_STORE_DEFINED) && defined(VINT8x16_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_STORE_DEFINED) \ + && (defined(VINT8x16_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32]) +{ + vint8x16_store(vec.dbl[0], x); + vint8x16_store(vec.dbl[1], x + 16); +} # define VINT8x32_STORE_DEFINED #endif - -#if !defined(VINT8x32_ADD_DEFINED) && defined(VINT8x16_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_ADD_DEFINED) \ + && (defined(VINT8x16_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_ADD_DEFINED #endif - -#if !defined(VINT8x32_SUB_DEFINED) && defined(VINT8x16_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_SUB_DEFINED) \ + && (defined(VINT8x16_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_SUB_DEFINED #endif - -#if !defined(VINT8x32_MUL_DEFINED) && defined(VINT8x16_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_MUL_DEFINED) \ + && (defined(VINT8x16_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_MUL_DEFINED #endif - -#if !defined(VINT8x32_DIV_DEFINED) && defined(VINT8x16_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_DIV_DEFINED) \ + && (defined(VINT8x16_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_div(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_DIV_DEFINED #endif - -#if !defined(VINT8x32_MOD_DEFINED) && defined(VINT8x16_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_MOD_DEFINED) \ + && (defined(VINT8x16_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_mod(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_MOD_DEFINED #endif - -#if !defined(VINT8x32_AVG_DEFINED) && defined(VINT8x16_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_AVG_DEFINED) \ + && (defined(VINT8x16_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_AVG_DEFINED #endif - -#if !defined(VINT8x32_AND_DEFINED) && defined(VINT8x16_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_AND_DEFINED) \ + && (defined(VINT8x16_AND_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_AND_DEFINED #endif - -#if !defined(VINT8x32_OR_DEFINED) && defined(VINT8x16_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_OR_DEFINED) \ + && (defined(VINT8x16_OR_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_OR_DEFINED #endif - -#if !defined(VINT8x32_XOR_DEFINED) && defined(VINT8x16_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_XOR_DEFINED) \ + && (defined(VINT8x16_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_XOR_DEFINED #endif - -#if !defined(VINT8x32_NOT_DEFINED) && defined(VINT8x16_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_NOT_DEFINED) \ + && (defined(VINT8x16_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec) +{ + vec.dbl[0] = vint8x16_not(vec.dbl[0]); + vec1.dbl[1] = vint8x16_not(vec.dbl[1]); + return vec; +} # define VINT8x32_NOT_DEFINED #endif - -#if !defined(VINT8x32_CMPLT_DEFINED) && defined(VINT8x16_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_CMPLT_DEFINED) \ + && (defined(VINT8x16_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_CMPLT_DEFINED #endif - -#if !defined(VINT8x32_CMPEQ_DEFINED) && defined(VINT8x16_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_CMPEQ_DEFINED) \ + && (defined(VINT8x16_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_CMPEQ_DEFINED #endif - -#if !defined(VINT8x32_CMPGT_DEFINED) && defined(VINT8x16_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_CMPGT_DEFINED) \ + && (defined(VINT8x16_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_CMPGT_DEFINED #endif - -#if !defined(VINT8x32_CMPLE_DEFINED) && defined(VINT8x16_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_CMPLE_DEFINED) \ + && (defined(VINT8x16_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_CMPLE_DEFINED #endif - -#if !defined(VINT8x32_CMPGE_DEFINED) && defined(VINT8x16_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_CMPGE_DEFINED) \ + && (defined(VINT8x16_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_CMPGE_DEFINED #endif - -#if !defined(VINT8x32_MIN_DEFINED) && defined(VINT8x16_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_MIN_DEFINED) \ + && (defined(VINT8x16_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_MIN_DEFINED #endif - -#if !defined(VINT8x32_MAX_DEFINED) && defined(VINT8x16_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_MAX_DEFINED) \ + && (defined(VINT8x16_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_MAX_DEFINED #endif - -#if !defined(VINT8x32_RSHIFT_DEFINED) && defined(VINT8x16_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_RSHIFT_DEFINED) \ + && (defined(VINT8x16_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_RSHIFT_DEFINED #endif - -#if !defined(VINT8x32_LRSHIFT_DEFINED) && defined(VINT8x16_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_LRSHIFT_DEFINED) \ + && (defined(VINT8x16_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_LRSHIFT_DEFINED #endif - -#if !defined(VINT8x32_LSHIFT_DEFINED) && defined(VINT8x16_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 8, 32, 16) +#if !defined(VINT8x32_LSHIFT_DEFINED) \ + && (defined(VINT8x16_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vint8x16_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x16_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x32_LSHIFT_DEFINED #endif - - - -/* vint8x32 */ - -#if !defined(VUINT8x32_SPLAT_DEFINED) && defined(VUINT8x16_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 8, 32, 16) +#if !defined(VUINT8x32_SPLAT_DEFINED) \ + && (defined(VUINT8x16_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x) +{ + vuint8x32 vec; + vec.dbl[0] = vuint8x16_splat(x); + vec.dbl[1] = vuint8x16_splat(x); + return vec; +} # define VUINT8x32_SPLAT_DEFINED #endif - -#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) && defined(VUINT8x16_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 8, 32, 16) +#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x16_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32]) +{ + vuint8x32 vec; + vec.dbl[0] = vuint8x16_load_aligned(x); + vec.dbl[1] = vuint8x16_load_aligned(x + 16); + return vec; +} # define VUINT8x32_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x32_LOAD_DEFINED) && defined(VUINT8x16_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 8, 32, 16) +#if !defined(VUINT8x32_LOAD_DEFINED) \ + && (defined(VUINT8x16_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32]) +{ + vuint8x32 vec; + vec.dbl[0] = vuint8x16_load(x); + vec.dbl[1] = vuint8x16_load(x + 16); + return vec; +} # define VUINT8x32_LOAD_DEFINED #endif - -#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) && defined(VUINT8x16_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 8, 32, 16) +#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x16_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32]) +{ + vuint8x16_store_aligned(vec.dbl[0], x); + vuint8x16_store_aligned(vec.dbl[1], x + 16); +} # define VUINT8x32_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x32_STORE_DEFINED) && defined(VUINT8x16_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 8, 32, 16) +#if !defined(VUINT8x32_STORE_DEFINED) \ + && (defined(VUINT8x16_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32]) +{ + vuint8x16_store(vec.dbl[0], x); + vuint8x16_store(vec.dbl[1], x + 16); +} # define VUINT8x32_STORE_DEFINED #endif - -#if !defined(VUINT8x32_ADD_DEFINED) && defined(VUINT8x16_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 8, 32, 16) +#if !defined(VUINT8x32_ADD_DEFINED) \ + && (defined(VUINT8x16_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_ADD_DEFINED #endif - -#if !defined(VUINT8x32_SUB_DEFINED) && defined(VUINT8x16_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 8, 32, 16) +#if !defined(VUINT8x32_SUB_DEFINED) \ + && (defined(VUINT8x16_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_SUB_DEFINED #endif - -#if !defined(VUINT8x32_MUL_DEFINED) && defined(VUINT8x16_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 8, 32, 16) +#if !defined(VUINT8x32_MUL_DEFINED) \ + && (defined(VUINT8x16_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_MUL_DEFINED #endif - -#if !defined(VUINT8x32_DIV_DEFINED) && defined(VUINT8x16_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 8, 32, 16) +#if !defined(VUINT8x32_DIV_DEFINED) \ + && (defined(VUINT8x16_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_div(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_DIV_DEFINED #endif - -#if !defined(VUINT8x32_MOD_DEFINED) && defined(VUINT8x16_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 8, 32, 16) +#if !defined(VUINT8x32_MOD_DEFINED) \ + && (defined(VUINT8x16_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_mod(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_MOD_DEFINED #endif - -#if !defined(VUINT8x32_AVG_DEFINED) && defined(VUINT8x16_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 8, 32, 16) +#if !defined(VUINT8x32_AVG_DEFINED) \ + && (defined(VUINT8x16_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_AVG_DEFINED #endif - -#if !defined(VUINT8x32_AND_DEFINED) && defined(VUINT8x16_AND_DEFINED) -VEC_DOUBLE_AND(u, 8, 32, 16) +#if !defined(VUINT8x32_AND_DEFINED) \ + && (defined(VUINT8x16_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_AND_DEFINED #endif - -#if !defined(VUINT8x32_OR_DEFINED) && defined(VUINT8x16_OR_DEFINED) -VEC_DOUBLE_OR(u, 8, 32, 16) +#if !defined(VUINT8x32_OR_DEFINED) \ + && (defined(VUINT8x16_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_OR_DEFINED #endif - -#if !defined(VUINT8x32_XOR_DEFINED) && defined(VUINT8x16_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 8, 32, 16) +#if !defined(VUINT8x32_XOR_DEFINED) \ + && (defined(VUINT8x16_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_XOR_DEFINED #endif - -#if !defined(VUINT8x32_NOT_DEFINED) && defined(VUINT8x16_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 8, 32, 16) +#if !defined(VUINT8x32_NOT_DEFINED) \ + && (defined(VUINT8x16_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec) +{ + vec.dbl[0] = vuint8x16_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x16_not(vec.dbl[1]); + return vec; +} # define VUINT8x32_NOT_DEFINED #endif - -#if !defined(VUINT8x32_CMPLT_DEFINED) && defined(VUINT8x16_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 8, 32, 16) +#if !defined(VUINT8x32_CMPLT_DEFINED) \ + && (defined(VUINT8x16_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_CMPLT_DEFINED #endif - -#if !defined(VUINT8x32_CMPEQ_DEFINED) && defined(VUINT8x16_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 8, 32, 16) +#if !defined(VUINT8x32_CMPEQ_DEFINED) \ + && (defined(VUINT8x16_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_CMPEQ_DEFINED #endif - -#if !defined(VUINT8x32_CMPGT_DEFINED) && defined(VUINT8x16_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 8, 32, 16) +#if !defined(VUINT8x32_CMPGT_DEFINED) \ + && (defined(VUINT8x16_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_CMPGT_DEFINED #endif - -#if !defined(VUINT8x32_CMPLE_DEFINED) && defined(VUINT8x16_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 8, 32, 16) +#if !defined(VUINT8x32_CMPLE_DEFINED) \ + && (defined(VUINT8x16_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_CMPLE_DEFINED #endif - -#if !defined(VUINT8x32_CMPGE_DEFINED) && defined(VUINT8x16_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 8, 32, 16) +#if !defined(VUINT8x32_CMPGE_DEFINED) \ + && (defined(VUINT8x16_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_CMPGE_DEFINED #endif - -#if !defined(VUINT8x32_MIN_DEFINED) && defined(VUINT8x16_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 8, 32, 16) +#if !defined(VUINT8x32_MIN_DEFINED) \ + && (defined(VUINT8x16_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_MIN_DEFINED #endif - -#if !defined(VUINT8x32_MAX_DEFINED) && defined(VUINT8x16_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 8, 32, 16) +#if !defined(VUINT8x32_MAX_DEFINED) \ + && (defined(VUINT8x16_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_MAX_DEFINED #endif - -#if !defined(VUINT8x32_RSHIFT_DEFINED) && defined(VUINT8x16_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 8, 32, 16) +#if !defined(VUINT8x32_RSHIFT_DEFINED) \ + && (defined(VUINT8x16_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_RSHIFT_DEFINED #endif - -#if !defined(VUINT8x32_LRSHIFT_DEFINED) && defined(VUINT8x16_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 8, 32, 16) +#if !defined(VUINT8x32_LRSHIFT_DEFINED) \ + && (defined(VUINT8x16_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_LRSHIFT_DEFINED #endif - -#if !defined(VUINT8x32_LSHIFT_DEFINED) && defined(VUINT8x16_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 8, 32, 16) +#if !defined(VUINT8x32_LSHIFT_DEFINED) \ + && (defined(VUINT8x16_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.dbl[0] = vuint8x16_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x16_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x32_LSHIFT_DEFINED #endif - - - -/* vuint8x64 */ - -#if !defined(VINT8x64_SPLAT_DEFINED) && defined(VINT8x32_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_SPLAT_DEFINED) \ + && (defined(VINT8x32_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x) +{ + vint8x64 vec; + vec.dbl[0] = vint8x32_splat(x); + vec.dbl[1] = vint8x32_splat(x); + return vec; +} # define VINT8x64_SPLAT_DEFINED #endif - -#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) && defined(VINT8x32_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT8x32_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64]) +{ + vint8x64 vec; + vec.dbl[0] = vint8x32_load_aligned(x); + vec.dbl[1] = vint8x32_load_aligned(x + 32); + return vec; +} # define VINT8x64_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT8x64_LOAD_DEFINED) && defined(VINT8x32_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_LOAD_DEFINED) \ + && (defined(VINT8x32_LOAD_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64]) +{ + vint8x64 vec; + vec.dbl[0] = vint8x32_load(x); + vec.dbl[1] = vint8x32_load(x + 32); + return vec; +} # define VINT8x64_LOAD_DEFINED #endif - -#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) && defined(VINT8x32_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) \ + && (defined(VINT8x32_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64]) +{ + vint8x32_store_aligned(vec.dbl[0], x); + vint8x32_store_aligned(vec.dbl[1], x + 32); +} # define VINT8x64_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT8x64_STORE_DEFINED) && defined(VINT8x32_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_STORE_DEFINED) \ + && (defined(VINT8x32_STORE_DEFINED)) +VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64]) +{ + vint8x32_store(vec.dbl[0], x); + vint8x32_store(vec.dbl[1], x + 32); +} # define VINT8x64_STORE_DEFINED #endif - -#if !defined(VINT8x64_ADD_DEFINED) && defined(VINT8x32_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_ADD_DEFINED) \ + && (defined(VINT8x32_ADD_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_ADD_DEFINED #endif - -#if !defined(VINT8x64_SUB_DEFINED) && defined(VINT8x32_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_SUB_DEFINED) \ + && (defined(VINT8x32_SUB_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_SUB_DEFINED #endif - -#if !defined(VINT8x64_MUL_DEFINED) && defined(VINT8x32_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_MUL_DEFINED) \ + && (defined(VINT8x32_MUL_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_MUL_DEFINED #endif - -#if !defined(VINT8x64_DIV_DEFINED) && defined(VINT8x32_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_DIV_DEFINED) \ + && (defined(VINT8x32_DIV_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_div(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_DIV_DEFINED #endif - -#if !defined(VINT8x64_MOD_DEFINED) && defined(VINT8x32_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_MOD_DEFINED) \ + && (defined(VINT8x32_MOD_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_mod(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_MOD_DEFINED #endif - -#if !defined(VINT8x64_AVG_DEFINED) && defined(VINT8x32_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_AVG_DEFINED) \ + && (defined(VINT8x32_AVG_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_AVG_DEFINED #endif - -#if !defined(VINT8x64_AND_DEFINED) && defined(VINT8x32_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_AND_DEFINED) \ + && (defined(VINT8x32_AND_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_AND_DEFINED #endif - -#if !defined(VINT8x64_OR_DEFINED) && defined(VINT8x32_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_OR_DEFINED) \ + && (defined(VINT8x32_OR_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_OR_DEFINED #endif - -#if !defined(VINT8x64_XOR_DEFINED) && defined(VINT8x32_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_XOR_DEFINED) \ + && (defined(VINT8x32_XOR_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_XOR_DEFINED #endif - -#if !defined(VINT8x64_NOT_DEFINED) && defined(VINT8x32_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_NOT_DEFINED) \ + && (defined(VINT8x32_NOT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec) +{ + vec.dbl[0] = vint8x32_not(vec.dbl[0]); + vec1.dbl[1] = vint8x32_not(vec.dbl[1]); + return vec; +} # define VINT8x64_NOT_DEFINED #endif - -#if !defined(VINT8x64_CMPLT_DEFINED) && defined(VINT8x32_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_CMPLT_DEFINED) \ + && (defined(VINT8x32_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_CMPLT_DEFINED #endif - -#if !defined(VINT8x64_CMPEQ_DEFINED) && defined(VINT8x32_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_CMPEQ_DEFINED) \ + && (defined(VINT8x32_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_CMPEQ_DEFINED #endif - -#if !defined(VINT8x64_CMPGT_DEFINED) && defined(VINT8x32_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_CMPGT_DEFINED) \ + && (defined(VINT8x32_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_CMPGT_DEFINED #endif - -#if !defined(VINT8x64_CMPLE_DEFINED) && defined(VINT8x32_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_CMPLE_DEFINED) \ + && (defined(VINT8x32_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_CMPLE_DEFINED #endif - -#if !defined(VINT8x64_CMPGE_DEFINED) && defined(VINT8x32_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_CMPGE_DEFINED) \ + && (defined(VINT8x32_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_CMPGE_DEFINED #endif - -#if !defined(VINT8x64_MIN_DEFINED) && defined(VINT8x32_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_MIN_DEFINED) \ + && (defined(VINT8x32_MIN_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_MIN_DEFINED #endif - -#if !defined(VINT8x64_MAX_DEFINED) && defined(VINT8x32_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_MAX_DEFINED) \ + && (defined(VINT8x32_MAX_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_MAX_DEFINED #endif - -#if !defined(VINT8x64_RSHIFT_DEFINED) && defined(VINT8x32_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_RSHIFT_DEFINED) \ + && (defined(VINT8x32_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_RSHIFT_DEFINED #endif - -#if !defined(VINT8x64_LRSHIFT_DEFINED) && defined(VINT8x32_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_LRSHIFT_DEFINED) \ + && (defined(VINT8x32_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_LRSHIFT_DEFINED #endif - -#if !defined(VINT8x64_LSHIFT_DEFINED) && defined(VINT8x32_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 8, 64, 32) +#if !defined(VINT8x64_LSHIFT_DEFINED) \ + && (defined(VINT8x32_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vint8x32_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint8x32_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT8x64_LSHIFT_DEFINED #endif - - - -/* vint8x64 */ - -#if !defined(VUINT8x64_SPLAT_DEFINED) && defined(VUINT8x32_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 8, 64, 32) +#if !defined(VUINT8x64_SPLAT_DEFINED) \ + && (defined(VUINT8x32_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x) +{ + vuint8x64 vec; + vec.dbl[0] = vuint8x32_splat(x); + vec.dbl[1] = vuint8x32_splat(x); + return vec; +} # define VUINT8x64_SPLAT_DEFINED #endif - -#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) && defined(VUINT8x32_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 8, 64, 32) +#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT8x32_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64]) +{ + vuint8x64 vec; + vec.dbl[0] = vuint8x32_load_aligned(x); + vec.dbl[1] = vuint8x32_load_aligned(x + 32); + return vec; +} # define VUINT8x64_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x64_LOAD_DEFINED) && defined(VUINT8x32_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 8, 64, 32) +#if !defined(VUINT8x64_LOAD_DEFINED) \ + && (defined(VUINT8x32_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64]) +{ + vuint8x64 vec; + vec.dbl[0] = vuint8x32_load(x); + vec.dbl[1] = vuint8x32_load(x + 32); + return vec; +} # define VUINT8x64_LOAD_DEFINED #endif - -#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) && defined(VUINT8x32_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 8, 64, 32) +#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT8x32_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64]) +{ + vuint8x32_store_aligned(vec.dbl[0], x); + vuint8x32_store_aligned(vec.dbl[1], x + 32); +} # define VUINT8x64_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT8x64_STORE_DEFINED) && defined(VUINT8x32_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 8, 64, 32) +#if !defined(VUINT8x64_STORE_DEFINED) \ + && (defined(VUINT8x32_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64]) +{ + vuint8x32_store(vec.dbl[0], x); + vuint8x32_store(vec.dbl[1], x + 32); +} # define VUINT8x64_STORE_DEFINED #endif - -#if !defined(VUINT8x64_ADD_DEFINED) && defined(VUINT8x32_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 8, 64, 32) +#if !defined(VUINT8x64_ADD_DEFINED) \ + && (defined(VUINT8x32_ADD_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_ADD_DEFINED #endif - -#if !defined(VUINT8x64_SUB_DEFINED) && defined(VUINT8x32_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 8, 64, 32) +#if !defined(VUINT8x64_SUB_DEFINED) \ + && (defined(VUINT8x32_SUB_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_SUB_DEFINED #endif - -#if !defined(VUINT8x64_MUL_DEFINED) && defined(VUINT8x32_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 8, 64, 32) +#if !defined(VUINT8x64_MUL_DEFINED) \ + && (defined(VUINT8x32_MUL_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_MUL_DEFINED #endif - -#if !defined(VUINT8x64_DIV_DEFINED) && defined(VUINT8x32_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 8, 64, 32) +#if !defined(VUINT8x64_DIV_DEFINED) \ + && (defined(VUINT8x32_DIV_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_div(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_DIV_DEFINED #endif - -#if !defined(VUINT8x64_MOD_DEFINED) && defined(VUINT8x32_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 8, 64, 32) +#if !defined(VUINT8x64_MOD_DEFINED) \ + && (defined(VUINT8x32_MOD_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_mod(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_MOD_DEFINED #endif - -#if !defined(VUINT8x64_AVG_DEFINED) && defined(VUINT8x32_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 8, 64, 32) +#if !defined(VUINT8x64_AVG_DEFINED) \ + && (defined(VUINT8x32_AVG_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_AVG_DEFINED #endif - -#if !defined(VUINT8x64_AND_DEFINED) && defined(VUINT8x32_AND_DEFINED) -VEC_DOUBLE_AND(u, 8, 64, 32) +#if !defined(VUINT8x64_AND_DEFINED) \ + && (defined(VUINT8x32_AND_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_AND_DEFINED #endif - -#if !defined(VUINT8x64_OR_DEFINED) && defined(VUINT8x32_OR_DEFINED) -VEC_DOUBLE_OR(u, 8, 64, 32) +#if !defined(VUINT8x64_OR_DEFINED) \ + && (defined(VUINT8x32_OR_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_OR_DEFINED #endif - -#if !defined(VUINT8x64_XOR_DEFINED) && defined(VUINT8x32_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 8, 64, 32) +#if !defined(VUINT8x64_XOR_DEFINED) \ + && (defined(VUINT8x32_XOR_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_XOR_DEFINED #endif - -#if !defined(VUINT8x64_NOT_DEFINED) && defined(VUINT8x32_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 8, 64, 32) +#if !defined(VUINT8x64_NOT_DEFINED) \ + && (defined(VUINT8x32_NOT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec) +{ + vec.dbl[0] = vuint8x32_not(vec.dbl[0]); + vec1.dbl[1] = vuint8x32_not(vec.dbl[1]); + return vec; +} # define VUINT8x64_NOT_DEFINED #endif - -#if !defined(VUINT8x64_CMPLT_DEFINED) && defined(VUINT8x32_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 8, 64, 32) +#if !defined(VUINT8x64_CMPLT_DEFINED) \ + && (defined(VUINT8x32_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_CMPLT_DEFINED #endif - -#if !defined(VUINT8x64_CMPEQ_DEFINED) && defined(VUINT8x32_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 8, 64, 32) +#if !defined(VUINT8x64_CMPEQ_DEFINED) \ + && (defined(VUINT8x32_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_CMPEQ_DEFINED #endif - -#if !defined(VUINT8x64_CMPGT_DEFINED) && defined(VUINT8x32_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 8, 64, 32) +#if !defined(VUINT8x64_CMPGT_DEFINED) \ + && (defined(VUINT8x32_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_CMPGT_DEFINED #endif - -#if !defined(VUINT8x64_CMPLE_DEFINED) && defined(VUINT8x32_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 8, 64, 32) +#if !defined(VUINT8x64_CMPLE_DEFINED) \ + && (defined(VUINT8x32_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_CMPLE_DEFINED #endif - -#if !defined(VUINT8x64_CMPGE_DEFINED) && defined(VUINT8x32_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 8, 64, 32) +#if !defined(VUINT8x64_CMPGE_DEFINED) \ + && (defined(VUINT8x32_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_CMPGE_DEFINED #endif - -#if !defined(VUINT8x64_MIN_DEFINED) && defined(VUINT8x32_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 8, 64, 32) +#if !defined(VUINT8x64_MIN_DEFINED) \ + && (defined(VUINT8x32_MIN_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_MIN_DEFINED #endif - -#if !defined(VUINT8x64_MAX_DEFINED) && defined(VUINT8x32_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 8, 64, 32) +#if !defined(VUINT8x64_MAX_DEFINED) \ + && (defined(VUINT8x32_MAX_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_MAX_DEFINED #endif - -#if !defined(VUINT8x64_RSHIFT_DEFINED) && defined(VUINT8x32_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 8, 64, 32) +#if !defined(VUINT8x64_RSHIFT_DEFINED) \ + && (defined(VUINT8x32_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_RSHIFT_DEFINED #endif - -#if !defined(VUINT8x64_LRSHIFT_DEFINED) && defined(VUINT8x32_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 8, 64, 32) +#if !defined(VUINT8x64_LRSHIFT_DEFINED) \ + && (defined(VUINT8x32_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_LRSHIFT_DEFINED #endif - -#if !defined(VUINT8x64_LSHIFT_DEFINED) && defined(VUINT8x32_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 8, 64, 32) +#if !defined(VUINT8x64_LSHIFT_DEFINED) \ + && (defined(VUINT8x32_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.dbl[0] = vuint8x32_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint8x32_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT8x64_LSHIFT_DEFINED #endif - - - -/* vuint16x4 */ - -#if !defined(VINT16x4_SPLAT_DEFINED) && defined(VINT16x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x2_SPLAT_DEFINED) \ + && (defined(VINT16x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x) +{ + vint16x2 vec; + vec.dbl[0] = vint16x1_splat(x); + vec.dbl[1] = vint16x1_splat(x); + return vec; +} +# define VINT16x2_SPLAT_DEFINED +#endif +#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT16x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2]) +{ + vint16x2 vec; + vec.dbl[0] = vint16x1_load_aligned(x); + vec.dbl[1] = vint16x1_load_aligned(x + 1); + return vec; +} +# define VINT16x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x2_LOAD_DEFINED) \ + && (defined(VINT16x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2]) +{ + vint16x2 vec; + vec.dbl[0] = vint16x1_load(x); + vec.dbl[1] = vint16x1_load(x + 1); + return vec; +} +# define VINT16x2_LOAD_DEFINED +#endif +#if !defined(VINT16x2_STORE_ALIGNED_DEFINED) \ + && (defined(VINT16x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2]) +{ + vint16x1_store_aligned(vec.dbl[0], x); + vint16x1_store_aligned(vec.dbl[1], x + 1); +} +# define VINT16x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x2_STORE_DEFINED) \ + && (defined(VINT16x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2]) +{ + vint16x1_store(vec.dbl[0], x); + vint16x1_store(vec.dbl[1], x + 1); +} +# define VINT16x2_STORE_DEFINED +#endif +#if !defined(VINT16x2_ADD_DEFINED) \ + && (defined(VINT16x1_ADD_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_ADD_DEFINED +#endif +#if !defined(VINT16x2_SUB_DEFINED) \ + && (defined(VINT16x1_SUB_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_SUB_DEFINED +#endif +#if !defined(VINT16x2_MUL_DEFINED) \ + && (defined(VINT16x1_MUL_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_MUL_DEFINED +#endif +#if !defined(VINT16x2_DIV_DEFINED) \ + && (defined(VINT16x1_DIV_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_div(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_DIV_DEFINED +#endif +#if !defined(VINT16x2_MOD_DEFINED) \ + && (defined(VINT16x1_MOD_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_mod(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_MOD_DEFINED +#endif +#if !defined(VINT16x2_AVG_DEFINED) \ + && (defined(VINT16x1_AVG_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_AVG_DEFINED +#endif +#if !defined(VINT16x2_AND_DEFINED) \ + && (defined(VINT16x1_AND_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_AND_DEFINED +#endif +#if !defined(VINT16x2_OR_DEFINED) \ + && (defined(VINT16x1_OR_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_OR_DEFINED +#endif +#if !defined(VINT16x2_XOR_DEFINED) \ + && (defined(VINT16x1_XOR_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_XOR_DEFINED +#endif +#if !defined(VINT16x2_NOT_DEFINED) \ + && (defined(VINT16x1_NOT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec) +{ + vec.dbl[0] = vint16x1_not(vec.dbl[0]); + vec1.dbl[1] = vint16x1_not(vec.dbl[1]); + return vec; +} +# define VINT16x2_NOT_DEFINED +#endif +#if !defined(VINT16x2_CMPLT_DEFINED) \ + && (defined(VINT16x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_CMPLT_DEFINED +#endif +#if !defined(VINT16x2_CMPEQ_DEFINED) \ + && (defined(VINT16x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_CMPEQ_DEFINED +#endif +#if !defined(VINT16x2_CMPGT_DEFINED) \ + && (defined(VINT16x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_CMPGT_DEFINED +#endif +#if !defined(VINT16x2_CMPLE_DEFINED) \ + && (defined(VINT16x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_CMPLE_DEFINED +#endif +#if !defined(VINT16x2_CMPGE_DEFINED) \ + && (defined(VINT16x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_CMPGE_DEFINED +#endif +#if !defined(VINT16x2_MIN_DEFINED) \ + && (defined(VINT16x1_MIN_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_MIN_DEFINED +#endif +#if !defined(VINT16x2_MAX_DEFINED) \ + && (defined(VINT16x1_MAX_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_MAX_DEFINED +#endif +#if !defined(VINT16x2_RSHIFT_DEFINED) \ + && (defined(VINT16x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_RSHIFT_DEFINED +#endif +#if !defined(VINT16x2_LRSHIFT_DEFINED) \ + && (defined(VINT16x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x2_LSHIFT_DEFINED) \ + && (defined(VINT16x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vint16x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT16x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x2_SPLAT_DEFINED) \ + && (defined(VUINT16x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x) +{ + vuint16x2 vec; + vec.dbl[0] = vuint16x1_splat(x); + vec.dbl[1] = vuint16x1_splat(x); + return vec; +} +# define VUINT16x2_SPLAT_DEFINED +#endif +#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT16x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2]) +{ + vuint16x2 vec; + vec.dbl[0] = vuint16x1_load_aligned(x); + vec.dbl[1] = vuint16x1_load_aligned(x + 1); + return vec; +} +# define VUINT16x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x2_LOAD_DEFINED) \ + && (defined(VUINT16x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2]) +{ + vuint16x2 vec; + vec.dbl[0] = vuint16x1_load(x); + vec.dbl[1] = vuint16x1_load(x + 1); + return vec; +} +# define VUINT16x2_LOAD_DEFINED +#endif +#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT16x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2]) +{ + vuint16x1_store_aligned(vec.dbl[0], x); + vuint16x1_store_aligned(vec.dbl[1], x + 1); +} +# define VUINT16x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x2_STORE_DEFINED) \ + && (defined(VUINT16x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2]) +{ + vuint16x1_store(vec.dbl[0], x); + vuint16x1_store(vec.dbl[1], x + 1); +} +# define VUINT16x2_STORE_DEFINED +#endif +#if !defined(VUINT16x2_ADD_DEFINED) \ + && (defined(VUINT16x1_ADD_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_ADD_DEFINED +#endif +#if !defined(VUINT16x2_SUB_DEFINED) \ + && (defined(VUINT16x1_SUB_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_SUB_DEFINED +#endif +#if !defined(VUINT16x2_MUL_DEFINED) \ + && (defined(VUINT16x1_MUL_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_MUL_DEFINED +#endif +#if !defined(VUINT16x2_DIV_DEFINED) \ + && (defined(VUINT16x1_DIV_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_div(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_DIV_DEFINED +#endif +#if !defined(VUINT16x2_MOD_DEFINED) \ + && (defined(VUINT16x1_MOD_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_mod(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_MOD_DEFINED +#endif +#if !defined(VUINT16x2_AVG_DEFINED) \ + && (defined(VUINT16x1_AVG_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_AVG_DEFINED +#endif +#if !defined(VUINT16x2_AND_DEFINED) \ + && (defined(VUINT16x1_AND_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_AND_DEFINED +#endif +#if !defined(VUINT16x2_OR_DEFINED) \ + && (defined(VUINT16x1_OR_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_OR_DEFINED +#endif +#if !defined(VUINT16x2_XOR_DEFINED) \ + && (defined(VUINT16x1_XOR_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_XOR_DEFINED +#endif +#if !defined(VUINT16x2_NOT_DEFINED) \ + && (defined(VUINT16x1_NOT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec) +{ + vec.dbl[0] = vuint16x1_not(vec.dbl[0]); + vec1.dbl[1] = vuint16x1_not(vec.dbl[1]); + return vec; +} +# define VUINT16x2_NOT_DEFINED +#endif +#if !defined(VUINT16x2_CMPLT_DEFINED) \ + && (defined(VUINT16x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_CMPLT_DEFINED +#endif +#if !defined(VUINT16x2_CMPEQ_DEFINED) \ + && (defined(VUINT16x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x2_CMPGT_DEFINED) \ + && (defined(VUINT16x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_CMPGT_DEFINED +#endif +#if !defined(VUINT16x2_CMPLE_DEFINED) \ + && (defined(VUINT16x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_CMPLE_DEFINED +#endif +#if !defined(VUINT16x2_CMPGE_DEFINED) \ + && (defined(VUINT16x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_CMPGE_DEFINED +#endif +#if !defined(VUINT16x2_MIN_DEFINED) \ + && (defined(VUINT16x1_MIN_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_MIN_DEFINED +#endif +#if !defined(VUINT16x2_MAX_DEFINED) \ + && (defined(VUINT16x1_MAX_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_MAX_DEFINED +#endif +#if !defined(VUINT16x2_RSHIFT_DEFINED) \ + && (defined(VUINT16x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x2_LRSHIFT_DEFINED) \ + && (defined(VUINT16x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x2_LSHIFT_DEFINED) \ + && (defined(VUINT16x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.dbl[0] = vuint16x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT16x2_LSHIFT_DEFINED +#endif +#if !defined(VINT16x4_SPLAT_DEFINED) \ + && (defined(VINT16x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x) +{ + vint16x4 vec; + vec.dbl[0] = vint16x2_splat(x); + vec.dbl[1] = vint16x2_splat(x); + return vec; +} # define VINT16x4_SPLAT_DEFINED #endif - -#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) && defined(VINT16x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT16x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4]) +{ + vint16x4 vec; + vec.dbl[0] = vint16x2_load_aligned(x); + vec.dbl[1] = vint16x2_load_aligned(x + 2); + return vec; +} # define VINT16x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT16x4_LOAD_DEFINED) && defined(VINT16x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_LOAD_DEFINED) \ + && (defined(VINT16x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4]) +{ + vint16x4 vec; + vec.dbl[0] = vint16x2_load(x); + vec.dbl[1] = vint16x2_load(x + 2); + return vec; +} # define VINT16x4_LOAD_DEFINED #endif - -#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) && defined(VINT16x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) \ + && (defined(VINT16x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4]) +{ + vint16x2_store_aligned(vec.dbl[0], x); + vint16x2_store_aligned(vec.dbl[1], x + 2); +} # define VINT16x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT16x4_STORE_DEFINED) && defined(VINT16x2_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_STORE_DEFINED) \ + && (defined(VINT16x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4]) +{ + vint16x2_store(vec.dbl[0], x); + vint16x2_store(vec.dbl[1], x + 2); +} # define VINT16x4_STORE_DEFINED #endif - -#if !defined(VINT16x4_ADD_DEFINED) && defined(VINT16x2_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_ADD_DEFINED) \ + && (defined(VINT16x2_ADD_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_ADD_DEFINED #endif - -#if !defined(VINT16x4_SUB_DEFINED) && defined(VINT16x2_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_SUB_DEFINED) \ + && (defined(VINT16x2_SUB_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_SUB_DEFINED #endif - -#if !defined(VINT16x4_MUL_DEFINED) && defined(VINT16x2_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_MUL_DEFINED) \ + && (defined(VINT16x2_MUL_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_MUL_DEFINED #endif - -#if !defined(VINT16x4_DIV_DEFINED) && defined(VINT16x2_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_DIV_DEFINED) \ + && (defined(VINT16x2_DIV_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_div(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_DIV_DEFINED #endif - -#if !defined(VINT16x4_MOD_DEFINED) && defined(VINT16x2_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_MOD_DEFINED) \ + && (defined(VINT16x2_MOD_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_mod(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_MOD_DEFINED #endif - -#if !defined(VINT16x4_AVG_DEFINED) && defined(VINT16x2_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_AVG_DEFINED) \ + && (defined(VINT16x2_AVG_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_AVG_DEFINED #endif - -#if !defined(VINT16x4_AND_DEFINED) && defined(VINT16x2_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_AND_DEFINED) \ + && (defined(VINT16x2_AND_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_AND_DEFINED #endif - -#if !defined(VINT16x4_OR_DEFINED) && defined(VINT16x2_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_OR_DEFINED) \ + && (defined(VINT16x2_OR_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_OR_DEFINED #endif - -#if !defined(VINT16x4_XOR_DEFINED) && defined(VINT16x2_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_XOR_DEFINED) \ + && (defined(VINT16x2_XOR_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_XOR_DEFINED #endif - -#if !defined(VINT16x4_NOT_DEFINED) && defined(VINT16x2_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_NOT_DEFINED) \ + && (defined(VINT16x2_NOT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec) +{ + vec.dbl[0] = vint16x2_not(vec.dbl[0]); + vec1.dbl[1] = vint16x2_not(vec.dbl[1]); + return vec; +} # define VINT16x4_NOT_DEFINED #endif - -#if !defined(VINT16x4_CMPLT_DEFINED) && defined(VINT16x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_CMPLT_DEFINED) \ + && (defined(VINT16x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_CMPLT_DEFINED #endif - -#if !defined(VINT16x4_CMPEQ_DEFINED) && defined(VINT16x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_CMPEQ_DEFINED) \ + && (defined(VINT16x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_CMPEQ_DEFINED #endif - -#if !defined(VINT16x4_CMPGT_DEFINED) && defined(VINT16x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_CMPGT_DEFINED) \ + && (defined(VINT16x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_CMPGT_DEFINED #endif - -#if !defined(VINT16x4_CMPLE_DEFINED) && defined(VINT16x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_CMPLE_DEFINED) \ + && (defined(VINT16x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_CMPLE_DEFINED #endif - -#if !defined(VINT16x4_CMPGE_DEFINED) && defined(VINT16x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_CMPGE_DEFINED) \ + && (defined(VINT16x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_CMPGE_DEFINED #endif - -#if !defined(VINT16x4_MIN_DEFINED) && defined(VINT16x2_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_MIN_DEFINED) \ + && (defined(VINT16x2_MIN_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_MIN_DEFINED #endif - -#if !defined(VINT16x4_MAX_DEFINED) && defined(VINT16x2_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_MAX_DEFINED) \ + && (defined(VINT16x2_MAX_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_MAX_DEFINED #endif - -#if !defined(VINT16x4_RSHIFT_DEFINED) && defined(VINT16x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_RSHIFT_DEFINED) \ + && (defined(VINT16x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_RSHIFT_DEFINED #endif - -#if !defined(VINT16x4_LRSHIFT_DEFINED) && defined(VINT16x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_LRSHIFT_DEFINED) \ + && (defined(VINT16x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_LRSHIFT_DEFINED #endif - -#if !defined(VINT16x4_LSHIFT_DEFINED) && defined(VINT16x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 16, 4, 2) +#if !defined(VINT16x4_LSHIFT_DEFINED) \ + && (defined(VINT16x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vint16x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x4_LSHIFT_DEFINED #endif - - - -/* vint16x4 */ - -#if !defined(VUINT16x4_SPLAT_DEFINED) && defined(VUINT16x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 16, 4, 2) +#if !defined(VUINT16x4_SPLAT_DEFINED) \ + && (defined(VUINT16x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x) +{ + vuint16x4 vec; + vec.dbl[0] = vuint16x2_splat(x); + vec.dbl[1] = vuint16x2_splat(x); + return vec; +} # define VUINT16x4_SPLAT_DEFINED #endif - -#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) && defined(VUINT16x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 16, 4, 2) +#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT16x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4]) +{ + vuint16x4 vec; + vec.dbl[0] = vuint16x2_load_aligned(x); + vec.dbl[1] = vuint16x2_load_aligned(x + 2); + return vec; +} # define VUINT16x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x4_LOAD_DEFINED) && defined(VUINT16x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 16, 4, 2) +#if !defined(VUINT16x4_LOAD_DEFINED) \ + && (defined(VUINT16x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4]) +{ + vuint16x4 vec; + vec.dbl[0] = vuint16x2_load(x); + vec.dbl[1] = vuint16x2_load(x + 2); + return vec; +} # define VUINT16x4_LOAD_DEFINED #endif - -#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) && defined(VUINT16x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 16, 4, 2) +#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT16x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4]) +{ + vuint16x2_store_aligned(vec.dbl[0], x); + vuint16x2_store_aligned(vec.dbl[1], x + 2); +} # define VUINT16x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x4_STORE_DEFINED) && defined(VUINT16x2_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 16, 4, 2) +#if !defined(VUINT16x4_STORE_DEFINED) \ + && (defined(VUINT16x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4]) +{ + vuint16x2_store(vec.dbl[0], x); + vuint16x2_store(vec.dbl[1], x + 2); +} # define VUINT16x4_STORE_DEFINED #endif - -#if !defined(VUINT16x4_ADD_DEFINED) && defined(VUINT16x2_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 16, 4, 2) +#if !defined(VUINT16x4_ADD_DEFINED) \ + && (defined(VUINT16x2_ADD_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_ADD_DEFINED #endif - -#if !defined(VUINT16x4_SUB_DEFINED) && defined(VUINT16x2_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 16, 4, 2) +#if !defined(VUINT16x4_SUB_DEFINED) \ + && (defined(VUINT16x2_SUB_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_SUB_DEFINED #endif - -#if !defined(VUINT16x4_MUL_DEFINED) && defined(VUINT16x2_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 16, 4, 2) +#if !defined(VUINT16x4_MUL_DEFINED) \ + && (defined(VUINT16x2_MUL_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_MUL_DEFINED #endif - -#if !defined(VUINT16x4_DIV_DEFINED) && defined(VUINT16x2_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 16, 4, 2) +#if !defined(VUINT16x4_DIV_DEFINED) \ + && (defined(VUINT16x2_DIV_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_div(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_DIV_DEFINED #endif - -#if !defined(VUINT16x4_MOD_DEFINED) && defined(VUINT16x2_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 16, 4, 2) +#if !defined(VUINT16x4_MOD_DEFINED) \ + && (defined(VUINT16x2_MOD_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_mod(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_MOD_DEFINED #endif - -#if !defined(VUINT16x4_AVG_DEFINED) && defined(VUINT16x2_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 16, 4, 2) +#if !defined(VUINT16x4_AVG_DEFINED) \ + && (defined(VUINT16x2_AVG_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_AVG_DEFINED #endif - -#if !defined(VUINT16x4_AND_DEFINED) && defined(VUINT16x2_AND_DEFINED) -VEC_DOUBLE_AND(u, 16, 4, 2) +#if !defined(VUINT16x4_AND_DEFINED) \ + && (defined(VUINT16x2_AND_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_AND_DEFINED #endif - -#if !defined(VUINT16x4_OR_DEFINED) && defined(VUINT16x2_OR_DEFINED) -VEC_DOUBLE_OR(u, 16, 4, 2) +#if !defined(VUINT16x4_OR_DEFINED) \ + && (defined(VUINT16x2_OR_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_OR_DEFINED #endif - -#if !defined(VUINT16x4_XOR_DEFINED) && defined(VUINT16x2_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 16, 4, 2) +#if !defined(VUINT16x4_XOR_DEFINED) \ + && (defined(VUINT16x2_XOR_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_XOR_DEFINED #endif - -#if !defined(VUINT16x4_NOT_DEFINED) && defined(VUINT16x2_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 16, 4, 2) +#if !defined(VUINT16x4_NOT_DEFINED) \ + && (defined(VUINT16x2_NOT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec) +{ + vec.dbl[0] = vuint16x2_not(vec.dbl[0]); + vec1.dbl[1] = vuint16x2_not(vec.dbl[1]); + return vec; +} # define VUINT16x4_NOT_DEFINED #endif - -#if !defined(VUINT16x4_CMPLT_DEFINED) && defined(VUINT16x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 16, 4, 2) +#if !defined(VUINT16x4_CMPLT_DEFINED) \ + && (defined(VUINT16x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_CMPLT_DEFINED #endif - -#if !defined(VUINT16x4_CMPEQ_DEFINED) && defined(VUINT16x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 16, 4, 2) +#if !defined(VUINT16x4_CMPEQ_DEFINED) \ + && (defined(VUINT16x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_CMPEQ_DEFINED #endif - -#if !defined(VUINT16x4_CMPGT_DEFINED) && defined(VUINT16x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 16, 4, 2) +#if !defined(VUINT16x4_CMPGT_DEFINED) \ + && (defined(VUINT16x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_CMPGT_DEFINED #endif - -#if !defined(VUINT16x4_CMPLE_DEFINED) && defined(VUINT16x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 16, 4, 2) +#if !defined(VUINT16x4_CMPLE_DEFINED) \ + && (defined(VUINT16x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_CMPLE_DEFINED #endif - -#if !defined(VUINT16x4_CMPGE_DEFINED) && defined(VUINT16x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 16, 4, 2) +#if !defined(VUINT16x4_CMPGE_DEFINED) \ + && (defined(VUINT16x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_CMPGE_DEFINED #endif - -#if !defined(VUINT16x4_MIN_DEFINED) && defined(VUINT16x2_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 16, 4, 2) +#if !defined(VUINT16x4_MIN_DEFINED) \ + && (defined(VUINT16x2_MIN_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_MIN_DEFINED #endif - -#if !defined(VUINT16x4_MAX_DEFINED) && defined(VUINT16x2_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 16, 4, 2) +#if !defined(VUINT16x4_MAX_DEFINED) \ + && (defined(VUINT16x2_MAX_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_MAX_DEFINED #endif - -#if !defined(VUINT16x4_RSHIFT_DEFINED) && defined(VUINT16x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 16, 4, 2) +#if !defined(VUINT16x4_RSHIFT_DEFINED) \ + && (defined(VUINT16x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_RSHIFT_DEFINED #endif - -#if !defined(VUINT16x4_LRSHIFT_DEFINED) && defined(VUINT16x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 16, 4, 2) +#if !defined(VUINT16x4_LRSHIFT_DEFINED) \ + && (defined(VUINT16x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_LRSHIFT_DEFINED #endif - -#if !defined(VUINT16x4_LSHIFT_DEFINED) && defined(VUINT16x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 16, 4, 2) +#if !defined(VUINT16x4_LSHIFT_DEFINED) \ + && (defined(VUINT16x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.dbl[0] = vuint16x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x4_LSHIFT_DEFINED #endif - - - -/* vuint16x8 */ - -#if !defined(VINT16x8_SPLAT_DEFINED) && defined(VINT16x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_SPLAT_DEFINED) \ + && (defined(VINT16x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) +{ + vint16x8 vec; + vec.dbl[0] = vint16x4_splat(x); + vec.dbl[1] = vint16x4_splat(x); + return vec; +} # define VINT16x8_SPLAT_DEFINED #endif - -#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) && defined(VINT16x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT16x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) +{ + vint16x8 vec; + vec.dbl[0] = vint16x4_load_aligned(x); + vec.dbl[1] = vint16x4_load_aligned(x + 4); + return vec; +} # define VINT16x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT16x8_LOAD_DEFINED) && defined(VINT16x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_LOAD_DEFINED) \ + && (defined(VINT16x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) +{ + vint16x8 vec; + vec.dbl[0] = vint16x4_load(x); + vec.dbl[1] = vint16x4_load(x + 4); + return vec; +} # define VINT16x8_LOAD_DEFINED #endif - -#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) && defined(VINT16x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) \ + && (defined(VINT16x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8]) +{ + vint16x4_store_aligned(vec.dbl[0], x); + vint16x4_store_aligned(vec.dbl[1], x + 4); +} # define VINT16x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT16x8_STORE_DEFINED) && defined(VINT16x4_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_STORE_DEFINED) \ + && (defined(VINT16x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8]) +{ + vint16x4_store(vec.dbl[0], x); + vint16x4_store(vec.dbl[1], x + 4); +} # define VINT16x8_STORE_DEFINED #endif - -#if !defined(VINT16x8_ADD_DEFINED) && defined(VINT16x4_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_ADD_DEFINED) \ + && (defined(VINT16x4_ADD_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_ADD_DEFINED #endif - -#if !defined(VINT16x8_SUB_DEFINED) && defined(VINT16x4_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_SUB_DEFINED) \ + && (defined(VINT16x4_SUB_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_SUB_DEFINED #endif - -#if !defined(VINT16x8_MUL_DEFINED) && defined(VINT16x4_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_MUL_DEFINED) \ + && (defined(VINT16x4_MUL_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_MUL_DEFINED #endif - -#if !defined(VINT16x8_DIV_DEFINED) && defined(VINT16x4_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_DIV_DEFINED) \ + && (defined(VINT16x4_DIV_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_div(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_DIV_DEFINED #endif - -#if !defined(VINT16x8_MOD_DEFINED) && defined(VINT16x4_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_MOD_DEFINED) \ + && (defined(VINT16x4_MOD_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_mod(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_MOD_DEFINED #endif - -#if !defined(VINT16x8_AVG_DEFINED) && defined(VINT16x4_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_AVG_DEFINED) \ + && (defined(VINT16x4_AVG_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_AVG_DEFINED #endif - -#if !defined(VINT16x8_AND_DEFINED) && defined(VINT16x4_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_AND_DEFINED) \ + && (defined(VINT16x4_AND_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_AND_DEFINED #endif - -#if !defined(VINT16x8_OR_DEFINED) && defined(VINT16x4_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_OR_DEFINED) \ + && (defined(VINT16x4_OR_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_OR_DEFINED #endif - -#if !defined(VINT16x8_XOR_DEFINED) && defined(VINT16x4_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_XOR_DEFINED) \ + && (defined(VINT16x4_XOR_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_XOR_DEFINED #endif - -#if !defined(VINT16x8_NOT_DEFINED) && defined(VINT16x4_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_NOT_DEFINED) \ + && (defined(VINT16x4_NOT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec) +{ + vec.dbl[0] = vint16x4_not(vec.dbl[0]); + vec1.dbl[1] = vint16x4_not(vec.dbl[1]); + return vec; +} # define VINT16x8_NOT_DEFINED #endif - -#if !defined(VINT16x8_CMPLT_DEFINED) && defined(VINT16x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_CMPLT_DEFINED) \ + && (defined(VINT16x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_CMPLT_DEFINED #endif - -#if !defined(VINT16x8_CMPEQ_DEFINED) && defined(VINT16x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_CMPEQ_DEFINED) \ + && (defined(VINT16x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_CMPEQ_DEFINED #endif - -#if !defined(VINT16x8_CMPGT_DEFINED) && defined(VINT16x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_CMPGT_DEFINED) \ + && (defined(VINT16x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_CMPGT_DEFINED #endif - -#if !defined(VINT16x8_CMPLE_DEFINED) && defined(VINT16x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_CMPLE_DEFINED) \ + && (defined(VINT16x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_CMPLE_DEFINED #endif - -#if !defined(VINT16x8_CMPGE_DEFINED) && defined(VINT16x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_CMPGE_DEFINED) \ + && (defined(VINT16x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_CMPGE_DEFINED #endif - -#if !defined(VINT16x8_MIN_DEFINED) && defined(VINT16x4_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_MIN_DEFINED) \ + && (defined(VINT16x4_MIN_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_MIN_DEFINED #endif - -#if !defined(VINT16x8_MAX_DEFINED) && defined(VINT16x4_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_MAX_DEFINED) \ + && (defined(VINT16x4_MAX_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_MAX_DEFINED #endif - -#if !defined(VINT16x8_RSHIFT_DEFINED) && defined(VINT16x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_RSHIFT_DEFINED) \ + && (defined(VINT16x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_RSHIFT_DEFINED #endif - -#if !defined(VINT16x8_LRSHIFT_DEFINED) && defined(VINT16x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_LRSHIFT_DEFINED) \ + && (defined(VINT16x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_LRSHIFT_DEFINED #endif - -#if !defined(VINT16x8_LSHIFT_DEFINED) && defined(VINT16x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 16, 8, 4) +#if !defined(VINT16x8_LSHIFT_DEFINED) \ + && (defined(VINT16x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vint16x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x8_LSHIFT_DEFINED #endif - - - -/* vint16x8 */ - -#if !defined(VUINT16x8_SPLAT_DEFINED) && defined(VUINT16x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 16, 8, 4) +#if !defined(VUINT16x8_SPLAT_DEFINED) \ + && (defined(VUINT16x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) +{ + vuint16x8 vec; + vec.dbl[0] = vuint16x4_splat(x); + vec.dbl[1] = vuint16x4_splat(x); + return vec; +} # define VUINT16x8_SPLAT_DEFINED #endif - -#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) && defined(VUINT16x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 16, 8, 4) +#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT16x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) +{ + vuint16x8 vec; + vec.dbl[0] = vuint16x4_load_aligned(x); + vec.dbl[1] = vuint16x4_load_aligned(x + 4); + return vec; +} # define VUINT16x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x8_LOAD_DEFINED) && defined(VUINT16x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 16, 8, 4) +#if !defined(VUINT16x8_LOAD_DEFINED) \ + && (defined(VUINT16x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) +{ + vuint16x8 vec; + vec.dbl[0] = vuint16x4_load(x); + vec.dbl[1] = vuint16x4_load(x + 4); + return vec; +} # define VUINT16x8_LOAD_DEFINED #endif - -#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) && defined(VUINT16x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 16, 8, 4) +#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT16x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8]) +{ + vuint16x4_store_aligned(vec.dbl[0], x); + vuint16x4_store_aligned(vec.dbl[1], x + 4); +} # define VUINT16x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x8_STORE_DEFINED) && defined(VUINT16x4_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 16, 8, 4) +#if !defined(VUINT16x8_STORE_DEFINED) \ + && (defined(VUINT16x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8]) +{ + vuint16x4_store(vec.dbl[0], x); + vuint16x4_store(vec.dbl[1], x + 4); +} # define VUINT16x8_STORE_DEFINED #endif - -#if !defined(VUINT16x8_ADD_DEFINED) && defined(VUINT16x4_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 16, 8, 4) +#if !defined(VUINT16x8_ADD_DEFINED) \ + && (defined(VUINT16x4_ADD_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_ADD_DEFINED #endif - -#if !defined(VUINT16x8_SUB_DEFINED) && defined(VUINT16x4_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 16, 8, 4) +#if !defined(VUINT16x8_SUB_DEFINED) \ + && (defined(VUINT16x4_SUB_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_SUB_DEFINED #endif - -#if !defined(VUINT16x8_MUL_DEFINED) && defined(VUINT16x4_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 16, 8, 4) +#if !defined(VUINT16x8_MUL_DEFINED) \ + && (defined(VUINT16x4_MUL_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_MUL_DEFINED #endif - -#if !defined(VUINT16x8_DIV_DEFINED) && defined(VUINT16x4_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 16, 8, 4) +#if !defined(VUINT16x8_DIV_DEFINED) \ + && (defined(VUINT16x4_DIV_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_div(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_DIV_DEFINED #endif - -#if !defined(VUINT16x8_MOD_DEFINED) && defined(VUINT16x4_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 16, 8, 4) +#if !defined(VUINT16x8_MOD_DEFINED) \ + && (defined(VUINT16x4_MOD_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_mod(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_MOD_DEFINED #endif - -#if !defined(VUINT16x8_AVG_DEFINED) && defined(VUINT16x4_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 16, 8, 4) +#if !defined(VUINT16x8_AVG_DEFINED) \ + && (defined(VUINT16x4_AVG_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_AVG_DEFINED #endif - -#if !defined(VUINT16x8_AND_DEFINED) && defined(VUINT16x4_AND_DEFINED) -VEC_DOUBLE_AND(u, 16, 8, 4) +#if !defined(VUINT16x8_AND_DEFINED) \ + && (defined(VUINT16x4_AND_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_AND_DEFINED #endif - -#if !defined(VUINT16x8_OR_DEFINED) && defined(VUINT16x4_OR_DEFINED) -VEC_DOUBLE_OR(u, 16, 8, 4) +#if !defined(VUINT16x8_OR_DEFINED) \ + && (defined(VUINT16x4_OR_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_OR_DEFINED #endif - -#if !defined(VUINT16x8_XOR_DEFINED) && defined(VUINT16x4_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 16, 8, 4) +#if !defined(VUINT16x8_XOR_DEFINED) \ + && (defined(VUINT16x4_XOR_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_XOR_DEFINED #endif - -#if !defined(VUINT16x8_NOT_DEFINED) && defined(VUINT16x4_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 16, 8, 4) +#if !defined(VUINT16x8_NOT_DEFINED) \ + && (defined(VUINT16x4_NOT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec) +{ + vec.dbl[0] = vuint16x4_not(vec.dbl[0]); + vec1.dbl[1] = vuint16x4_not(vec.dbl[1]); + return vec; +} # define VUINT16x8_NOT_DEFINED #endif - -#if !defined(VUINT16x8_CMPLT_DEFINED) && defined(VUINT16x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 16, 8, 4) +#if !defined(VUINT16x8_CMPLT_DEFINED) \ + && (defined(VUINT16x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_CMPLT_DEFINED #endif - -#if !defined(VUINT16x8_CMPEQ_DEFINED) && defined(VUINT16x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 16, 8, 4) +#if !defined(VUINT16x8_CMPEQ_DEFINED) \ + && (defined(VUINT16x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_CMPEQ_DEFINED #endif - -#if !defined(VUINT16x8_CMPGT_DEFINED) && defined(VUINT16x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 16, 8, 4) +#if !defined(VUINT16x8_CMPGT_DEFINED) \ + && (defined(VUINT16x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_CMPGT_DEFINED #endif - -#if !defined(VUINT16x8_CMPLE_DEFINED) && defined(VUINT16x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 16, 8, 4) +#if !defined(VUINT16x8_CMPLE_DEFINED) \ + && (defined(VUINT16x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_CMPLE_DEFINED #endif - -#if !defined(VUINT16x8_CMPGE_DEFINED) && defined(VUINT16x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 16, 8, 4) +#if !defined(VUINT16x8_CMPGE_DEFINED) \ + && (defined(VUINT16x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_CMPGE_DEFINED #endif - -#if !defined(VUINT16x8_MIN_DEFINED) && defined(VUINT16x4_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 16, 8, 4) +#if !defined(VUINT16x8_MIN_DEFINED) \ + && (defined(VUINT16x4_MIN_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_MIN_DEFINED #endif - -#if !defined(VUINT16x8_MAX_DEFINED) && defined(VUINT16x4_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 16, 8, 4) +#if !defined(VUINT16x8_MAX_DEFINED) \ + && (defined(VUINT16x4_MAX_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_MAX_DEFINED #endif - -#if !defined(VUINT16x8_RSHIFT_DEFINED) && defined(VUINT16x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 16, 8, 4) +#if !defined(VUINT16x8_RSHIFT_DEFINED) \ + && (defined(VUINT16x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_RSHIFT_DEFINED #endif - -#if !defined(VUINT16x8_LRSHIFT_DEFINED) && defined(VUINT16x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 16, 8, 4) +#if !defined(VUINT16x8_LRSHIFT_DEFINED) \ + && (defined(VUINT16x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_LRSHIFT_DEFINED #endif - -#if !defined(VUINT16x8_LSHIFT_DEFINED) && defined(VUINT16x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 16, 8, 4) +#if !defined(VUINT16x8_LSHIFT_DEFINED) \ + && (defined(VUINT16x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.dbl[0] = vuint16x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x8_LSHIFT_DEFINED #endif - - - -/* vuint16x16 */ - -#if !defined(VINT16x16_SPLAT_DEFINED) && defined(VINT16x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_SPLAT_DEFINED) \ + && (defined(VINT16x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x) +{ + vint16x16 vec; + vec.dbl[0] = vint16x8_splat(x); + vec.dbl[1] = vint16x8_splat(x); + return vec; +} # define VINT16x16_SPLAT_DEFINED #endif - -#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) && defined(VINT16x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT16x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16]) +{ + vint16x16 vec; + vec.dbl[0] = vint16x8_load_aligned(x); + vec.dbl[1] = vint16x8_load_aligned(x + 8); + return vec; +} # define VINT16x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT16x16_LOAD_DEFINED) && defined(VINT16x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_LOAD_DEFINED) \ + && (defined(VINT16x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16]) +{ + vint16x16 vec; + vec.dbl[0] = vint16x8_load(x); + vec.dbl[1] = vint16x8_load(x + 8); + return vec; +} # define VINT16x16_LOAD_DEFINED #endif - -#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) && defined(VINT16x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) \ + && (defined(VINT16x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16]) +{ + vint16x8_store_aligned(vec.dbl[0], x); + vint16x8_store_aligned(vec.dbl[1], x + 8); +} # define VINT16x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT16x16_STORE_DEFINED) && defined(VINT16x8_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_STORE_DEFINED) \ + && (defined(VINT16x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16]) +{ + vint16x8_store(vec.dbl[0], x); + vint16x8_store(vec.dbl[1], x + 8); +} # define VINT16x16_STORE_DEFINED #endif - -#if !defined(VINT16x16_ADD_DEFINED) && defined(VINT16x8_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_ADD_DEFINED) \ + && (defined(VINT16x8_ADD_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_ADD_DEFINED #endif - -#if !defined(VINT16x16_SUB_DEFINED) && defined(VINT16x8_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_SUB_DEFINED) \ + && (defined(VINT16x8_SUB_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_SUB_DEFINED #endif - -#if !defined(VINT16x16_MUL_DEFINED) && defined(VINT16x8_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_MUL_DEFINED) \ + && (defined(VINT16x8_MUL_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_MUL_DEFINED #endif - -#if !defined(VINT16x16_DIV_DEFINED) && defined(VINT16x8_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_DIV_DEFINED) \ + && (defined(VINT16x8_DIV_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_div(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_DIV_DEFINED #endif - -#if !defined(VINT16x16_MOD_DEFINED) && defined(VINT16x8_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_MOD_DEFINED) \ + && (defined(VINT16x8_MOD_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_mod(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_MOD_DEFINED #endif - -#if !defined(VINT16x16_AVG_DEFINED) && defined(VINT16x8_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_AVG_DEFINED) \ + && (defined(VINT16x8_AVG_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_AVG_DEFINED #endif - -#if !defined(VINT16x16_AND_DEFINED) && defined(VINT16x8_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_AND_DEFINED) \ + && (defined(VINT16x8_AND_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_AND_DEFINED #endif - -#if !defined(VINT16x16_OR_DEFINED) && defined(VINT16x8_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_OR_DEFINED) \ + && (defined(VINT16x8_OR_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_OR_DEFINED #endif - -#if !defined(VINT16x16_XOR_DEFINED) && defined(VINT16x8_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_XOR_DEFINED) \ + && (defined(VINT16x8_XOR_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_XOR_DEFINED #endif - -#if !defined(VINT16x16_NOT_DEFINED) && defined(VINT16x8_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_NOT_DEFINED) \ + && (defined(VINT16x8_NOT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec) +{ + vec.dbl[0] = vint16x8_not(vec.dbl[0]); + vec1.dbl[1] = vint16x8_not(vec.dbl[1]); + return vec; +} # define VINT16x16_NOT_DEFINED #endif - -#if !defined(VINT16x16_CMPLT_DEFINED) && defined(VINT16x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_CMPLT_DEFINED) \ + && (defined(VINT16x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_CMPLT_DEFINED #endif - -#if !defined(VINT16x16_CMPEQ_DEFINED) && defined(VINT16x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_CMPEQ_DEFINED) \ + && (defined(VINT16x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_CMPEQ_DEFINED #endif - -#if !defined(VINT16x16_CMPGT_DEFINED) && defined(VINT16x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_CMPGT_DEFINED) \ + && (defined(VINT16x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_CMPGT_DEFINED #endif - -#if !defined(VINT16x16_CMPLE_DEFINED) && defined(VINT16x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_CMPLE_DEFINED) \ + && (defined(VINT16x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_CMPLE_DEFINED #endif - -#if !defined(VINT16x16_CMPGE_DEFINED) && defined(VINT16x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_CMPGE_DEFINED) \ + && (defined(VINT16x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_CMPGE_DEFINED #endif - -#if !defined(VINT16x16_MIN_DEFINED) && defined(VINT16x8_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_MIN_DEFINED) \ + && (defined(VINT16x8_MIN_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_MIN_DEFINED #endif - -#if !defined(VINT16x16_MAX_DEFINED) && defined(VINT16x8_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_MAX_DEFINED) \ + && (defined(VINT16x8_MAX_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_MAX_DEFINED #endif - -#if !defined(VINT16x16_RSHIFT_DEFINED) && defined(VINT16x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_RSHIFT_DEFINED) \ + && (defined(VINT16x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_RSHIFT_DEFINED #endif - -#if !defined(VINT16x16_LRSHIFT_DEFINED) && defined(VINT16x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_LRSHIFT_DEFINED) \ + && (defined(VINT16x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_LRSHIFT_DEFINED #endif - -#if !defined(VINT16x16_LSHIFT_DEFINED) && defined(VINT16x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 16, 16, 8) +#if !defined(VINT16x16_LSHIFT_DEFINED) \ + && (defined(VINT16x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vint16x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x16_LSHIFT_DEFINED #endif - - - -/* vint16x16 */ - -#if !defined(VUINT16x16_SPLAT_DEFINED) && defined(VUINT16x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 16, 16, 8) +#if !defined(VUINT16x16_SPLAT_DEFINED) \ + && (defined(VUINT16x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x) +{ + vuint16x16 vec; + vec.dbl[0] = vuint16x8_splat(x); + vec.dbl[1] = vuint16x8_splat(x); + return vec; +} # define VUINT16x16_SPLAT_DEFINED #endif - -#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) && defined(VUINT16x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 16, 16, 8) +#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT16x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16]) +{ + vuint16x16 vec; + vec.dbl[0] = vuint16x8_load_aligned(x); + vec.dbl[1] = vuint16x8_load_aligned(x + 8); + return vec; +} # define VUINT16x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x16_LOAD_DEFINED) && defined(VUINT16x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 16, 16, 8) +#if !defined(VUINT16x16_LOAD_DEFINED) \ + && (defined(VUINT16x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16]) +{ + vuint16x16 vec; + vec.dbl[0] = vuint16x8_load(x); + vec.dbl[1] = vuint16x8_load(x + 8); + return vec; +} # define VUINT16x16_LOAD_DEFINED #endif - -#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) && defined(VUINT16x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 16, 16, 8) +#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT16x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16]) +{ + vuint16x8_store_aligned(vec.dbl[0], x); + vuint16x8_store_aligned(vec.dbl[1], x + 8); +} # define VUINT16x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x16_STORE_DEFINED) && defined(VUINT16x8_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 16, 16, 8) +#if !defined(VUINT16x16_STORE_DEFINED) \ + && (defined(VUINT16x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16]) +{ + vuint16x8_store(vec.dbl[0], x); + vuint16x8_store(vec.dbl[1], x + 8); +} # define VUINT16x16_STORE_DEFINED #endif - -#if !defined(VUINT16x16_ADD_DEFINED) && defined(VUINT16x8_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 16, 16, 8) +#if !defined(VUINT16x16_ADD_DEFINED) \ + && (defined(VUINT16x8_ADD_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_ADD_DEFINED #endif - -#if !defined(VUINT16x16_SUB_DEFINED) && defined(VUINT16x8_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 16, 16, 8) +#if !defined(VUINT16x16_SUB_DEFINED) \ + && (defined(VUINT16x8_SUB_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_SUB_DEFINED #endif - -#if !defined(VUINT16x16_MUL_DEFINED) && defined(VUINT16x8_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 16, 16, 8) +#if !defined(VUINT16x16_MUL_DEFINED) \ + && (defined(VUINT16x8_MUL_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_MUL_DEFINED #endif - -#if !defined(VUINT16x16_DIV_DEFINED) && defined(VUINT16x8_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 16, 16, 8) +#if !defined(VUINT16x16_DIV_DEFINED) \ + && (defined(VUINT16x8_DIV_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_div(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_DIV_DEFINED #endif - -#if !defined(VUINT16x16_MOD_DEFINED) && defined(VUINT16x8_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 16, 16, 8) +#if !defined(VUINT16x16_MOD_DEFINED) \ + && (defined(VUINT16x8_MOD_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_mod(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_MOD_DEFINED #endif - -#if !defined(VUINT16x16_AVG_DEFINED) && defined(VUINT16x8_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 16, 16, 8) +#if !defined(VUINT16x16_AVG_DEFINED) \ + && (defined(VUINT16x8_AVG_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_AVG_DEFINED #endif - -#if !defined(VUINT16x16_AND_DEFINED) && defined(VUINT16x8_AND_DEFINED) -VEC_DOUBLE_AND(u, 16, 16, 8) +#if !defined(VUINT16x16_AND_DEFINED) \ + && (defined(VUINT16x8_AND_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_AND_DEFINED #endif - -#if !defined(VUINT16x16_OR_DEFINED) && defined(VUINT16x8_OR_DEFINED) -VEC_DOUBLE_OR(u, 16, 16, 8) +#if !defined(VUINT16x16_OR_DEFINED) \ + && (defined(VUINT16x8_OR_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_OR_DEFINED #endif - -#if !defined(VUINT16x16_XOR_DEFINED) && defined(VUINT16x8_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 16, 16, 8) +#if !defined(VUINT16x16_XOR_DEFINED) \ + && (defined(VUINT16x8_XOR_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_XOR_DEFINED #endif - -#if !defined(VUINT16x16_NOT_DEFINED) && defined(VUINT16x8_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 16, 16, 8) +#if !defined(VUINT16x16_NOT_DEFINED) \ + && (defined(VUINT16x8_NOT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec) +{ + vec.dbl[0] = vuint16x8_not(vec.dbl[0]); + vec1.dbl[1] = vuint16x8_not(vec.dbl[1]); + return vec; +} # define VUINT16x16_NOT_DEFINED #endif - -#if !defined(VUINT16x16_CMPLT_DEFINED) && defined(VUINT16x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 16, 16, 8) +#if !defined(VUINT16x16_CMPLT_DEFINED) \ + && (defined(VUINT16x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_CMPLT_DEFINED #endif - -#if !defined(VUINT16x16_CMPEQ_DEFINED) && defined(VUINT16x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 16, 16, 8) +#if !defined(VUINT16x16_CMPEQ_DEFINED) \ + && (defined(VUINT16x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_CMPEQ_DEFINED #endif - -#if !defined(VUINT16x16_CMPGT_DEFINED) && defined(VUINT16x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 16, 16, 8) +#if !defined(VUINT16x16_CMPGT_DEFINED) \ + && (defined(VUINT16x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_CMPGT_DEFINED #endif - -#if !defined(VUINT16x16_CMPLE_DEFINED) && defined(VUINT16x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 16, 16, 8) +#if !defined(VUINT16x16_CMPLE_DEFINED) \ + && (defined(VUINT16x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_CMPLE_DEFINED #endif - -#if !defined(VUINT16x16_CMPGE_DEFINED) && defined(VUINT16x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 16, 16, 8) +#if !defined(VUINT16x16_CMPGE_DEFINED) \ + && (defined(VUINT16x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_CMPGE_DEFINED #endif - -#if !defined(VUINT16x16_MIN_DEFINED) && defined(VUINT16x8_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 16, 16, 8) +#if !defined(VUINT16x16_MIN_DEFINED) \ + && (defined(VUINT16x8_MIN_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_MIN_DEFINED #endif - -#if !defined(VUINT16x16_MAX_DEFINED) && defined(VUINT16x8_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 16, 16, 8) +#if !defined(VUINT16x16_MAX_DEFINED) \ + && (defined(VUINT16x8_MAX_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_MAX_DEFINED #endif - -#if !defined(VUINT16x16_RSHIFT_DEFINED) && defined(VUINT16x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 16, 16, 8) +#if !defined(VUINT16x16_RSHIFT_DEFINED) \ + && (defined(VUINT16x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_RSHIFT_DEFINED #endif - -#if !defined(VUINT16x16_LRSHIFT_DEFINED) && defined(VUINT16x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 16, 16, 8) +#if !defined(VUINT16x16_LRSHIFT_DEFINED) \ + && (defined(VUINT16x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_LRSHIFT_DEFINED #endif - -#if !defined(VUINT16x16_LSHIFT_DEFINED) && defined(VUINT16x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 16, 16, 8) +#if !defined(VUINT16x16_LSHIFT_DEFINED) \ + && (defined(VUINT16x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.dbl[0] = vuint16x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x16_LSHIFT_DEFINED #endif - - - -/* vuint16x32 */ - -#if !defined(VINT16x32_SPLAT_DEFINED) && defined(VINT16x16_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_SPLAT_DEFINED) \ + && (defined(VINT16x16_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x) +{ + vint16x32 vec; + vec.dbl[0] = vint16x16_splat(x); + vec.dbl[1] = vint16x16_splat(x); + return vec; +} # define VINT16x32_SPLAT_DEFINED #endif - -#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) && defined(VINT16x16_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT16x16_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32]) +{ + vint16x32 vec; + vec.dbl[0] = vint16x16_load_aligned(x); + vec.dbl[1] = vint16x16_load_aligned(x + 16); + return vec; +} # define VINT16x32_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT16x32_LOAD_DEFINED) && defined(VINT16x16_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_LOAD_DEFINED) \ + && (defined(VINT16x16_LOAD_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32]) +{ + vint16x32 vec; + vec.dbl[0] = vint16x16_load(x); + vec.dbl[1] = vint16x16_load(x + 16); + return vec; +} # define VINT16x32_LOAD_DEFINED #endif - -#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) && defined(VINT16x16_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) \ + && (defined(VINT16x16_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32]) +{ + vint16x16_store_aligned(vec.dbl[0], x); + vint16x16_store_aligned(vec.dbl[1], x + 16); +} # define VINT16x32_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT16x32_STORE_DEFINED) && defined(VINT16x16_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_STORE_DEFINED) \ + && (defined(VINT16x16_STORE_DEFINED)) +VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32]) +{ + vint16x16_store(vec.dbl[0], x); + vint16x16_store(vec.dbl[1], x + 16); +} # define VINT16x32_STORE_DEFINED #endif - -#if !defined(VINT16x32_ADD_DEFINED) && defined(VINT16x16_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_ADD_DEFINED) \ + && (defined(VINT16x16_ADD_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_ADD_DEFINED #endif - -#if !defined(VINT16x32_SUB_DEFINED) && defined(VINT16x16_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_SUB_DEFINED) \ + && (defined(VINT16x16_SUB_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_SUB_DEFINED #endif - -#if !defined(VINT16x32_MUL_DEFINED) && defined(VINT16x16_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_MUL_DEFINED) \ + && (defined(VINT16x16_MUL_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_MUL_DEFINED #endif - -#if !defined(VINT16x32_DIV_DEFINED) && defined(VINT16x16_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_DIV_DEFINED) \ + && (defined(VINT16x16_DIV_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_div(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_DIV_DEFINED #endif - -#if !defined(VINT16x32_MOD_DEFINED) && defined(VINT16x16_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_MOD_DEFINED) \ + && (defined(VINT16x16_MOD_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_mod(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_MOD_DEFINED #endif - -#if !defined(VINT16x32_AVG_DEFINED) && defined(VINT16x16_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_AVG_DEFINED) \ + && (defined(VINT16x16_AVG_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_AVG_DEFINED #endif - -#if !defined(VINT16x32_AND_DEFINED) && defined(VINT16x16_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_AND_DEFINED) \ + && (defined(VINT16x16_AND_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_AND_DEFINED #endif - -#if !defined(VINT16x32_OR_DEFINED) && defined(VINT16x16_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_OR_DEFINED) \ + && (defined(VINT16x16_OR_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_OR_DEFINED #endif - -#if !defined(VINT16x32_XOR_DEFINED) && defined(VINT16x16_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_XOR_DEFINED) \ + && (defined(VINT16x16_XOR_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_XOR_DEFINED #endif - -#if !defined(VINT16x32_NOT_DEFINED) && defined(VINT16x16_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_NOT_DEFINED) \ + && (defined(VINT16x16_NOT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec) +{ + vec.dbl[0] = vint16x16_not(vec.dbl[0]); + vec1.dbl[1] = vint16x16_not(vec.dbl[1]); + return vec; +} # define VINT16x32_NOT_DEFINED #endif - -#if !defined(VINT16x32_CMPLT_DEFINED) && defined(VINT16x16_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_CMPLT_DEFINED) \ + && (defined(VINT16x16_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_CMPLT_DEFINED #endif - -#if !defined(VINT16x32_CMPEQ_DEFINED) && defined(VINT16x16_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_CMPEQ_DEFINED) \ + && (defined(VINT16x16_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_CMPEQ_DEFINED #endif - -#if !defined(VINT16x32_CMPGT_DEFINED) && defined(VINT16x16_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_CMPGT_DEFINED) \ + && (defined(VINT16x16_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_CMPGT_DEFINED #endif - -#if !defined(VINT16x32_CMPLE_DEFINED) && defined(VINT16x16_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_CMPLE_DEFINED) \ + && (defined(VINT16x16_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_CMPLE_DEFINED #endif - -#if !defined(VINT16x32_CMPGE_DEFINED) && defined(VINT16x16_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_CMPGE_DEFINED) \ + && (defined(VINT16x16_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_CMPGE_DEFINED #endif - -#if !defined(VINT16x32_MIN_DEFINED) && defined(VINT16x16_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_MIN_DEFINED) \ + && (defined(VINT16x16_MIN_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_MIN_DEFINED #endif - -#if !defined(VINT16x32_MAX_DEFINED) && defined(VINT16x16_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_MAX_DEFINED) \ + && (defined(VINT16x16_MAX_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_MAX_DEFINED #endif - -#if !defined(VINT16x32_RSHIFT_DEFINED) && defined(VINT16x16_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_RSHIFT_DEFINED) \ + && (defined(VINT16x16_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_RSHIFT_DEFINED #endif - -#if !defined(VINT16x32_LRSHIFT_DEFINED) && defined(VINT16x16_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_LRSHIFT_DEFINED) \ + && (defined(VINT16x16_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_LRSHIFT_DEFINED #endif - -#if !defined(VINT16x32_LSHIFT_DEFINED) && defined(VINT16x16_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 16, 32, 16) +#if !defined(VINT16x32_LSHIFT_DEFINED) \ + && (defined(VINT16x16_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vint16x16_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint16x16_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT16x32_LSHIFT_DEFINED #endif - - - -/* vint16x32 */ - -#if !defined(VUINT16x32_SPLAT_DEFINED) && defined(VUINT16x16_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 16, 32, 16) +#if !defined(VUINT16x32_SPLAT_DEFINED) \ + && (defined(VUINT16x16_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x) +{ + vuint16x32 vec; + vec.dbl[0] = vuint16x16_splat(x); + vec.dbl[1] = vuint16x16_splat(x); + return vec; +} # define VUINT16x32_SPLAT_DEFINED #endif - -#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) && defined(VUINT16x16_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 16, 32, 16) +#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT16x16_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32]) +{ + vuint16x32 vec; + vec.dbl[0] = vuint16x16_load_aligned(x); + vec.dbl[1] = vuint16x16_load_aligned(x + 16); + return vec; +} # define VUINT16x32_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x32_LOAD_DEFINED) && defined(VUINT16x16_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 16, 32, 16) +#if !defined(VUINT16x32_LOAD_DEFINED) \ + && (defined(VUINT16x16_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32]) +{ + vuint16x32 vec; + vec.dbl[0] = vuint16x16_load(x); + vec.dbl[1] = vuint16x16_load(x + 16); + return vec; +} # define VUINT16x32_LOAD_DEFINED #endif - -#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) && defined(VUINT16x16_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 16, 32, 16) +#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT16x16_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32]) +{ + vuint16x16_store_aligned(vec.dbl[0], x); + vuint16x16_store_aligned(vec.dbl[1], x + 16); +} # define VUINT16x32_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT16x32_STORE_DEFINED) && defined(VUINT16x16_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 16, 32, 16) +#if !defined(VUINT16x32_STORE_DEFINED) \ + && (defined(VUINT16x16_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32]) +{ + vuint16x16_store(vec.dbl[0], x); + vuint16x16_store(vec.dbl[1], x + 16); +} # define VUINT16x32_STORE_DEFINED #endif - -#if !defined(VUINT16x32_ADD_DEFINED) && defined(VUINT16x16_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 16, 32, 16) +#if !defined(VUINT16x32_ADD_DEFINED) \ + && (defined(VUINT16x16_ADD_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_ADD_DEFINED #endif - -#if !defined(VUINT16x32_SUB_DEFINED) && defined(VUINT16x16_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 16, 32, 16) +#if !defined(VUINT16x32_SUB_DEFINED) \ + && (defined(VUINT16x16_SUB_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_SUB_DEFINED #endif - -#if !defined(VUINT16x32_MUL_DEFINED) && defined(VUINT16x16_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 16, 32, 16) +#if !defined(VUINT16x32_MUL_DEFINED) \ + && (defined(VUINT16x16_MUL_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_MUL_DEFINED #endif - -#if !defined(VUINT16x32_DIV_DEFINED) && defined(VUINT16x16_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 16, 32, 16) +#if !defined(VUINT16x32_DIV_DEFINED) \ + && (defined(VUINT16x16_DIV_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_div(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_DIV_DEFINED #endif - -#if !defined(VUINT16x32_MOD_DEFINED) && defined(VUINT16x16_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 16, 32, 16) +#if !defined(VUINT16x32_MOD_DEFINED) \ + && (defined(VUINT16x16_MOD_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_mod(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_MOD_DEFINED #endif - -#if !defined(VUINT16x32_AVG_DEFINED) && defined(VUINT16x16_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 16, 32, 16) +#if !defined(VUINT16x32_AVG_DEFINED) \ + && (defined(VUINT16x16_AVG_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_AVG_DEFINED #endif - -#if !defined(VUINT16x32_AND_DEFINED) && defined(VUINT16x16_AND_DEFINED) -VEC_DOUBLE_AND(u, 16, 32, 16) +#if !defined(VUINT16x32_AND_DEFINED) \ + && (defined(VUINT16x16_AND_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_AND_DEFINED #endif - -#if !defined(VUINT16x32_OR_DEFINED) && defined(VUINT16x16_OR_DEFINED) -VEC_DOUBLE_OR(u, 16, 32, 16) +#if !defined(VUINT16x32_OR_DEFINED) \ + && (defined(VUINT16x16_OR_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_OR_DEFINED #endif - -#if !defined(VUINT16x32_XOR_DEFINED) && defined(VUINT16x16_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 16, 32, 16) +#if !defined(VUINT16x32_XOR_DEFINED) \ + && (defined(VUINT16x16_XOR_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_XOR_DEFINED #endif - -#if !defined(VUINT16x32_NOT_DEFINED) && defined(VUINT16x16_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 16, 32, 16) +#if !defined(VUINT16x32_NOT_DEFINED) \ + && (defined(VUINT16x16_NOT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec) +{ + vec.dbl[0] = vuint16x16_not(vec.dbl[0]); + vec1.dbl[1] = vuint16x16_not(vec.dbl[1]); + return vec; +} # define VUINT16x32_NOT_DEFINED #endif - -#if !defined(VUINT16x32_CMPLT_DEFINED) && defined(VUINT16x16_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 16, 32, 16) +#if !defined(VUINT16x32_CMPLT_DEFINED) \ + && (defined(VUINT16x16_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_CMPLT_DEFINED #endif - -#if !defined(VUINT16x32_CMPEQ_DEFINED) && defined(VUINT16x16_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 16, 32, 16) +#if !defined(VUINT16x32_CMPEQ_DEFINED) \ + && (defined(VUINT16x16_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_CMPEQ_DEFINED #endif - -#if !defined(VUINT16x32_CMPGT_DEFINED) && defined(VUINT16x16_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 16, 32, 16) +#if !defined(VUINT16x32_CMPGT_DEFINED) \ + && (defined(VUINT16x16_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_CMPGT_DEFINED #endif - -#if !defined(VUINT16x32_CMPLE_DEFINED) && defined(VUINT16x16_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 16, 32, 16) +#if !defined(VUINT16x32_CMPLE_DEFINED) \ + && (defined(VUINT16x16_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_CMPLE_DEFINED #endif - -#if !defined(VUINT16x32_CMPGE_DEFINED) && defined(VUINT16x16_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 16, 32, 16) +#if !defined(VUINT16x32_CMPGE_DEFINED) \ + && (defined(VUINT16x16_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_CMPGE_DEFINED #endif - -#if !defined(VUINT16x32_MIN_DEFINED) && defined(VUINT16x16_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 16, 32, 16) +#if !defined(VUINT16x32_MIN_DEFINED) \ + && (defined(VUINT16x16_MIN_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_MIN_DEFINED #endif - -#if !defined(VUINT16x32_MAX_DEFINED) && defined(VUINT16x16_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 16, 32, 16) +#if !defined(VUINT16x32_MAX_DEFINED) \ + && (defined(VUINT16x16_MAX_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_MAX_DEFINED #endif - -#if !defined(VUINT16x32_RSHIFT_DEFINED) && defined(VUINT16x16_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 16, 32, 16) +#if !defined(VUINT16x32_RSHIFT_DEFINED) \ + && (defined(VUINT16x16_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_RSHIFT_DEFINED #endif - -#if !defined(VUINT16x32_LRSHIFT_DEFINED) && defined(VUINT16x16_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 16, 32, 16) +#if !defined(VUINT16x32_LRSHIFT_DEFINED) \ + && (defined(VUINT16x16_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_LRSHIFT_DEFINED #endif - -#if !defined(VUINT16x32_LSHIFT_DEFINED) && defined(VUINT16x16_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 16, 32, 16) +#if !defined(VUINT16x32_LSHIFT_DEFINED) \ + && (defined(VUINT16x16_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.dbl[0] = vuint16x16_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint16x16_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT16x32_LSHIFT_DEFINED #endif - - - -/* vuint32x4 */ - -#if !defined(VINT32x4_SPLAT_DEFINED) && defined(VINT32x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x2_SPLAT_DEFINED) \ + && (defined(VINT32x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x) +{ + vint32x2 vec; + vec.dbl[0] = vint32x1_splat(x); + vec.dbl[1] = vint32x1_splat(x); + return vec; +} +# define VINT32x2_SPLAT_DEFINED +#endif +#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT32x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2]) +{ + vint32x2 vec; + vec.dbl[0] = vint32x1_load_aligned(x); + vec.dbl[1] = vint32x1_load_aligned(x + 1); + return vec; +} +# define VINT32x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT32x2_LOAD_DEFINED) \ + && (defined(VINT32x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2]) +{ + vint32x2 vec; + vec.dbl[0] = vint32x1_load(x); + vec.dbl[1] = vint32x1_load(x + 1); + return vec; +} +# define VINT32x2_LOAD_DEFINED +#endif +#if !defined(VINT32x2_STORE_ALIGNED_DEFINED) \ + && (defined(VINT32x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2]) +{ + vint32x1_store_aligned(vec.dbl[0], x); + vint32x1_store_aligned(vec.dbl[1], x + 1); +} +# define VINT32x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT32x2_STORE_DEFINED) \ + && (defined(VINT32x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2]) +{ + vint32x1_store(vec.dbl[0], x); + vint32x1_store(vec.dbl[1], x + 1); +} +# define VINT32x2_STORE_DEFINED +#endif +#if !defined(VINT32x2_ADD_DEFINED) \ + && (defined(VINT32x1_ADD_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_ADD_DEFINED +#endif +#if !defined(VINT32x2_SUB_DEFINED) \ + && (defined(VINT32x1_SUB_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_SUB_DEFINED +#endif +#if !defined(VINT32x2_MUL_DEFINED) \ + && (defined(VINT32x1_MUL_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_MUL_DEFINED +#endif +#if !defined(VINT32x2_DIV_DEFINED) \ + && (defined(VINT32x1_DIV_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_div(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_DIV_DEFINED +#endif +#if !defined(VINT32x2_MOD_DEFINED) \ + && (defined(VINT32x1_MOD_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_mod(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_MOD_DEFINED +#endif +#if !defined(VINT32x2_AVG_DEFINED) \ + && (defined(VINT32x1_AVG_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_AVG_DEFINED +#endif +#if !defined(VINT32x2_AND_DEFINED) \ + && (defined(VINT32x1_AND_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_AND_DEFINED +#endif +#if !defined(VINT32x2_OR_DEFINED) \ + && (defined(VINT32x1_OR_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_OR_DEFINED +#endif +#if !defined(VINT32x2_XOR_DEFINED) \ + && (defined(VINT32x1_XOR_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_XOR_DEFINED +#endif +#if !defined(VINT32x2_NOT_DEFINED) \ + && (defined(VINT32x1_NOT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec) +{ + vec.dbl[0] = vint32x1_not(vec.dbl[0]); + vec1.dbl[1] = vint32x1_not(vec.dbl[1]); + return vec; +} +# define VINT32x2_NOT_DEFINED +#endif +#if !defined(VINT32x2_CMPLT_DEFINED) \ + && (defined(VINT32x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_CMPLT_DEFINED +#endif +#if !defined(VINT32x2_CMPEQ_DEFINED) \ + && (defined(VINT32x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_CMPEQ_DEFINED +#endif +#if !defined(VINT32x2_CMPGT_DEFINED) \ + && (defined(VINT32x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_CMPGT_DEFINED +#endif +#if !defined(VINT32x2_CMPLE_DEFINED) \ + && (defined(VINT32x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_CMPLE_DEFINED +#endif +#if !defined(VINT32x2_CMPGE_DEFINED) \ + && (defined(VINT32x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_CMPGE_DEFINED +#endif +#if !defined(VINT32x2_MIN_DEFINED) \ + && (defined(VINT32x1_MIN_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_MIN_DEFINED +#endif +#if !defined(VINT32x2_MAX_DEFINED) \ + && (defined(VINT32x1_MAX_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_MAX_DEFINED +#endif +#if !defined(VINT32x2_RSHIFT_DEFINED) \ + && (defined(VINT32x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_RSHIFT_DEFINED +#endif +#if !defined(VINT32x2_LRSHIFT_DEFINED) \ + && (defined(VINT32x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x2_LSHIFT_DEFINED) \ + && (defined(VINT32x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vint32x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT32x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x2_SPLAT_DEFINED) \ + && (defined(VUINT32x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x) +{ + vuint32x2 vec; + vec.dbl[0] = vuint32x1_splat(x); + vec.dbl[1] = vuint32x1_splat(x); + return vec; +} +# define VUINT32x2_SPLAT_DEFINED +#endif +#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT32x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2]) +{ + vuint32x2 vec; + vec.dbl[0] = vuint32x1_load_aligned(x); + vec.dbl[1] = vuint32x1_load_aligned(x + 1); + return vec; +} +# define VUINT32x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x2_LOAD_DEFINED) \ + && (defined(VUINT32x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2]) +{ + vuint32x2 vec; + vec.dbl[0] = vuint32x1_load(x); + vec.dbl[1] = vuint32x1_load(x + 1); + return vec; +} +# define VUINT32x2_LOAD_DEFINED +#endif +#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT32x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2]) +{ + vuint32x1_store_aligned(vec.dbl[0], x); + vuint32x1_store_aligned(vec.dbl[1], x + 1); +} +# define VUINT32x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x2_STORE_DEFINED) \ + && (defined(VUINT32x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2]) +{ + vuint32x1_store(vec.dbl[0], x); + vuint32x1_store(vec.dbl[1], x + 1); +} +# define VUINT32x2_STORE_DEFINED +#endif +#if !defined(VUINT32x2_ADD_DEFINED) \ + && (defined(VUINT32x1_ADD_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_ADD_DEFINED +#endif +#if !defined(VUINT32x2_SUB_DEFINED) \ + && (defined(VUINT32x1_SUB_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_SUB_DEFINED +#endif +#if !defined(VUINT32x2_MUL_DEFINED) \ + && (defined(VUINT32x1_MUL_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_MUL_DEFINED +#endif +#if !defined(VUINT32x2_DIV_DEFINED) \ + && (defined(VUINT32x1_DIV_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_div(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_DIV_DEFINED +#endif +#if !defined(VUINT32x2_MOD_DEFINED) \ + && (defined(VUINT32x1_MOD_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_mod(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_MOD_DEFINED +#endif +#if !defined(VUINT32x2_AVG_DEFINED) \ + && (defined(VUINT32x1_AVG_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_AVG_DEFINED +#endif +#if !defined(VUINT32x2_AND_DEFINED) \ + && (defined(VUINT32x1_AND_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_AND_DEFINED +#endif +#if !defined(VUINT32x2_OR_DEFINED) \ + && (defined(VUINT32x1_OR_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_OR_DEFINED +#endif +#if !defined(VUINT32x2_XOR_DEFINED) \ + && (defined(VUINT32x1_XOR_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_XOR_DEFINED +#endif +#if !defined(VUINT32x2_NOT_DEFINED) \ + && (defined(VUINT32x1_NOT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec) +{ + vec.dbl[0] = vuint32x1_not(vec.dbl[0]); + vec1.dbl[1] = vuint32x1_not(vec.dbl[1]); + return vec; +} +# define VUINT32x2_NOT_DEFINED +#endif +#if !defined(VUINT32x2_CMPLT_DEFINED) \ + && (defined(VUINT32x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_CMPLT_DEFINED +#endif +#if !defined(VUINT32x2_CMPEQ_DEFINED) \ + && (defined(VUINT32x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_CMPEQ_DEFINED +#endif +#if !defined(VUINT32x2_CMPGT_DEFINED) \ + && (defined(VUINT32x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_CMPGT_DEFINED +#endif +#if !defined(VUINT32x2_CMPLE_DEFINED) \ + && (defined(VUINT32x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_CMPLE_DEFINED +#endif +#if !defined(VUINT32x2_CMPGE_DEFINED) \ + && (defined(VUINT32x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_CMPGE_DEFINED +#endif +#if !defined(VUINT32x2_MIN_DEFINED) \ + && (defined(VUINT32x1_MIN_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_MIN_DEFINED +#endif +#if !defined(VUINT32x2_MAX_DEFINED) \ + && (defined(VUINT32x1_MAX_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_MAX_DEFINED +#endif +#if !defined(VUINT32x2_RSHIFT_DEFINED) \ + && (defined(VUINT32x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_RSHIFT_DEFINED +#endif +#if !defined(VUINT32x2_LRSHIFT_DEFINED) \ + && (defined(VUINT32x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x2_LSHIFT_DEFINED) \ + && (defined(VUINT32x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.dbl[0] = vuint32x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT32x2_LSHIFT_DEFINED +#endif +#if !defined(VINT32x4_SPLAT_DEFINED) \ + && (defined(VINT32x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) +{ + vint32x4 vec; + vec.dbl[0] = vint32x2_splat(x); + vec.dbl[1] = vint32x2_splat(x); + return vec; +} # define VINT32x4_SPLAT_DEFINED #endif - -#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) && defined(VINT32x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT32x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) +{ + vint32x4 vec; + vec.dbl[0] = vint32x2_load_aligned(x); + vec.dbl[1] = vint32x2_load_aligned(x + 2); + return vec; +} # define VINT32x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT32x4_LOAD_DEFINED) && defined(VINT32x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_LOAD_DEFINED) \ + && (defined(VINT32x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) +{ + vint32x4 vec; + vec.dbl[0] = vint32x2_load(x); + vec.dbl[1] = vint32x2_load(x + 2); + return vec; +} # define VINT32x4_LOAD_DEFINED #endif - -#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) && defined(VINT32x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) \ + && (defined(VINT32x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4]) +{ + vint32x2_store_aligned(vec.dbl[0], x); + vint32x2_store_aligned(vec.dbl[1], x + 2); +} # define VINT32x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT32x4_STORE_DEFINED) && defined(VINT32x2_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_STORE_DEFINED) \ + && (defined(VINT32x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4]) +{ + vint32x2_store(vec.dbl[0], x); + vint32x2_store(vec.dbl[1], x + 2); +} # define VINT32x4_STORE_DEFINED #endif - -#if !defined(VINT32x4_ADD_DEFINED) && defined(VINT32x2_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_ADD_DEFINED) \ + && (defined(VINT32x2_ADD_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_ADD_DEFINED #endif - -#if !defined(VINT32x4_SUB_DEFINED) && defined(VINT32x2_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_SUB_DEFINED) \ + && (defined(VINT32x2_SUB_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_SUB_DEFINED #endif - -#if !defined(VINT32x4_MUL_DEFINED) && defined(VINT32x2_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_MUL_DEFINED) \ + && (defined(VINT32x2_MUL_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_MUL_DEFINED #endif - -#if !defined(VINT32x4_DIV_DEFINED) && defined(VINT32x2_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_DIV_DEFINED) \ + && (defined(VINT32x2_DIV_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_div(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_DIV_DEFINED #endif - -#if !defined(VINT32x4_MOD_DEFINED) && defined(VINT32x2_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_MOD_DEFINED) \ + && (defined(VINT32x2_MOD_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_mod(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_MOD_DEFINED #endif - -#if !defined(VINT32x4_AVG_DEFINED) && defined(VINT32x2_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_AVG_DEFINED) \ + && (defined(VINT32x2_AVG_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_AVG_DEFINED #endif - -#if !defined(VINT32x4_AND_DEFINED) && defined(VINT32x2_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_AND_DEFINED) \ + && (defined(VINT32x2_AND_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_AND_DEFINED #endif - -#if !defined(VINT32x4_OR_DEFINED) && defined(VINT32x2_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_OR_DEFINED) \ + && (defined(VINT32x2_OR_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_OR_DEFINED #endif - -#if !defined(VINT32x4_XOR_DEFINED) && defined(VINT32x2_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_XOR_DEFINED) \ + && (defined(VINT32x2_XOR_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_XOR_DEFINED #endif - -#if !defined(VINT32x4_NOT_DEFINED) && defined(VINT32x2_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_NOT_DEFINED) \ + && (defined(VINT32x2_NOT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec) +{ + vec.dbl[0] = vint32x2_not(vec.dbl[0]); + vec1.dbl[1] = vint32x2_not(vec.dbl[1]); + return vec; +} # define VINT32x4_NOT_DEFINED #endif - -#if !defined(VINT32x4_CMPLT_DEFINED) && defined(VINT32x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_CMPLT_DEFINED) \ + && (defined(VINT32x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_CMPLT_DEFINED #endif - -#if !defined(VINT32x4_CMPEQ_DEFINED) && defined(VINT32x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_CMPEQ_DEFINED) \ + && (defined(VINT32x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_CMPEQ_DEFINED #endif - -#if !defined(VINT32x4_CMPGT_DEFINED) && defined(VINT32x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_CMPGT_DEFINED) \ + && (defined(VINT32x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_CMPGT_DEFINED #endif - -#if !defined(VINT32x4_CMPLE_DEFINED) && defined(VINT32x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_CMPLE_DEFINED) \ + && (defined(VINT32x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_CMPLE_DEFINED #endif - -#if !defined(VINT32x4_CMPGE_DEFINED) && defined(VINT32x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_CMPGE_DEFINED) \ + && (defined(VINT32x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_CMPGE_DEFINED #endif - -#if !defined(VINT32x4_MIN_DEFINED) && defined(VINT32x2_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_MIN_DEFINED) \ + && (defined(VINT32x2_MIN_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_MIN_DEFINED #endif - -#if !defined(VINT32x4_MAX_DEFINED) && defined(VINT32x2_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_MAX_DEFINED) \ + && (defined(VINT32x2_MAX_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_MAX_DEFINED #endif - -#if !defined(VINT32x4_RSHIFT_DEFINED) && defined(VINT32x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_RSHIFT_DEFINED) \ + && (defined(VINT32x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_RSHIFT_DEFINED #endif - -#if !defined(VINT32x4_LRSHIFT_DEFINED) && defined(VINT32x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_LRSHIFT_DEFINED) \ + && (defined(VINT32x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_LRSHIFT_DEFINED #endif - -#if !defined(VINT32x4_LSHIFT_DEFINED) && defined(VINT32x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 32, 4, 2) +#if !defined(VINT32x4_LSHIFT_DEFINED) \ + && (defined(VINT32x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vint32x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x4_LSHIFT_DEFINED #endif - - - -/* vint32x4 */ - -#if !defined(VUINT32x4_SPLAT_DEFINED) && defined(VUINT32x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 32, 4, 2) +#if !defined(VUINT32x4_SPLAT_DEFINED) \ + && (defined(VUINT32x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) +{ + vuint32x4 vec; + vec.dbl[0] = vuint32x2_splat(x); + vec.dbl[1] = vuint32x2_splat(x); + return vec; +} # define VUINT32x4_SPLAT_DEFINED #endif - -#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) && defined(VUINT32x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 32, 4, 2) +#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT32x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) +{ + vuint32x4 vec; + vec.dbl[0] = vuint32x2_load_aligned(x); + vec.dbl[1] = vuint32x2_load_aligned(x + 2); + return vec; +} # define VUINT32x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x4_LOAD_DEFINED) && defined(VUINT32x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 32, 4, 2) +#if !defined(VUINT32x4_LOAD_DEFINED) \ + && (defined(VUINT32x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) +{ + vuint32x4 vec; + vec.dbl[0] = vuint32x2_load(x); + vec.dbl[1] = vuint32x2_load(x + 2); + return vec; +} # define VUINT32x4_LOAD_DEFINED #endif - -#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) && defined(VUINT32x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 32, 4, 2) +#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT32x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4]) +{ + vuint32x2_store_aligned(vec.dbl[0], x); + vuint32x2_store_aligned(vec.dbl[1], x + 2); +} # define VUINT32x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x4_STORE_DEFINED) && defined(VUINT32x2_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 32, 4, 2) +#if !defined(VUINT32x4_STORE_DEFINED) \ + && (defined(VUINT32x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4]) +{ + vuint32x2_store(vec.dbl[0], x); + vuint32x2_store(vec.dbl[1], x + 2); +} # define VUINT32x4_STORE_DEFINED #endif - -#if !defined(VUINT32x4_ADD_DEFINED) && defined(VUINT32x2_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 32, 4, 2) +#if !defined(VUINT32x4_ADD_DEFINED) \ + && (defined(VUINT32x2_ADD_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_ADD_DEFINED #endif - -#if !defined(VUINT32x4_SUB_DEFINED) && defined(VUINT32x2_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 32, 4, 2) +#if !defined(VUINT32x4_SUB_DEFINED) \ + && (defined(VUINT32x2_SUB_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_SUB_DEFINED #endif - -#if !defined(VUINT32x4_MUL_DEFINED) && defined(VUINT32x2_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 32, 4, 2) +#if !defined(VUINT32x4_MUL_DEFINED) \ + && (defined(VUINT32x2_MUL_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_MUL_DEFINED #endif - -#if !defined(VUINT32x4_DIV_DEFINED) && defined(VUINT32x2_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 32, 4, 2) +#if !defined(VUINT32x4_DIV_DEFINED) \ + && (defined(VUINT32x2_DIV_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_div(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_DIV_DEFINED #endif - -#if !defined(VUINT32x4_MOD_DEFINED) && defined(VUINT32x2_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 32, 4, 2) +#if !defined(VUINT32x4_MOD_DEFINED) \ + && (defined(VUINT32x2_MOD_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_mod(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_MOD_DEFINED #endif - -#if !defined(VUINT32x4_AVG_DEFINED) && defined(VUINT32x2_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 32, 4, 2) +#if !defined(VUINT32x4_AVG_DEFINED) \ + && (defined(VUINT32x2_AVG_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_AVG_DEFINED #endif - -#if !defined(VUINT32x4_AND_DEFINED) && defined(VUINT32x2_AND_DEFINED) -VEC_DOUBLE_AND(u, 32, 4, 2) +#if !defined(VUINT32x4_AND_DEFINED) \ + && (defined(VUINT32x2_AND_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_AND_DEFINED #endif - -#if !defined(VUINT32x4_OR_DEFINED) && defined(VUINT32x2_OR_DEFINED) -VEC_DOUBLE_OR(u, 32, 4, 2) +#if !defined(VUINT32x4_OR_DEFINED) \ + && (defined(VUINT32x2_OR_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_OR_DEFINED #endif - -#if !defined(VUINT32x4_XOR_DEFINED) && defined(VUINT32x2_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 32, 4, 2) +#if !defined(VUINT32x4_XOR_DEFINED) \ + && (defined(VUINT32x2_XOR_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_XOR_DEFINED #endif - -#if !defined(VUINT32x4_NOT_DEFINED) && defined(VUINT32x2_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 32, 4, 2) +#if !defined(VUINT32x4_NOT_DEFINED) \ + && (defined(VUINT32x2_NOT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec) +{ + vec.dbl[0] = vuint32x2_not(vec.dbl[0]); + vec1.dbl[1] = vuint32x2_not(vec.dbl[1]); + return vec; +} # define VUINT32x4_NOT_DEFINED #endif - -#if !defined(VUINT32x4_CMPLT_DEFINED) && defined(VUINT32x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 32, 4, 2) +#if !defined(VUINT32x4_CMPLT_DEFINED) \ + && (defined(VUINT32x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_CMPLT_DEFINED #endif - -#if !defined(VUINT32x4_CMPEQ_DEFINED) && defined(VUINT32x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 32, 4, 2) +#if !defined(VUINT32x4_CMPEQ_DEFINED) \ + && (defined(VUINT32x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_CMPEQ_DEFINED #endif - -#if !defined(VUINT32x4_CMPGT_DEFINED) && defined(VUINT32x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 32, 4, 2) +#if !defined(VUINT32x4_CMPGT_DEFINED) \ + && (defined(VUINT32x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_CMPGT_DEFINED #endif - -#if !defined(VUINT32x4_CMPLE_DEFINED) && defined(VUINT32x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 32, 4, 2) +#if !defined(VUINT32x4_CMPLE_DEFINED) \ + && (defined(VUINT32x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_CMPLE_DEFINED #endif - -#if !defined(VUINT32x4_CMPGE_DEFINED) && defined(VUINT32x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 32, 4, 2) +#if !defined(VUINT32x4_CMPGE_DEFINED) \ + && (defined(VUINT32x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_CMPGE_DEFINED #endif - -#if !defined(VUINT32x4_MIN_DEFINED) && defined(VUINT32x2_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 32, 4, 2) +#if !defined(VUINT32x4_MIN_DEFINED) \ + && (defined(VUINT32x2_MIN_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_MIN_DEFINED #endif - -#if !defined(VUINT32x4_MAX_DEFINED) && defined(VUINT32x2_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 32, 4, 2) +#if !defined(VUINT32x4_MAX_DEFINED) \ + && (defined(VUINT32x2_MAX_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_MAX_DEFINED #endif - -#if !defined(VUINT32x4_RSHIFT_DEFINED) && defined(VUINT32x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 32, 4, 2) +#if !defined(VUINT32x4_RSHIFT_DEFINED) \ + && (defined(VUINT32x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_RSHIFT_DEFINED #endif - -#if !defined(VUINT32x4_LRSHIFT_DEFINED) && defined(VUINT32x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 32, 4, 2) +#if !defined(VUINT32x4_LRSHIFT_DEFINED) \ + && (defined(VUINT32x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_LRSHIFT_DEFINED #endif - -#if !defined(VUINT32x4_LSHIFT_DEFINED) && defined(VUINT32x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 32, 4, 2) +#if !defined(VUINT32x4_LSHIFT_DEFINED) \ + && (defined(VUINT32x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.dbl[0] = vuint32x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x4_LSHIFT_DEFINED #endif - - - -/* vuint32x8 */ - -#if !defined(VINT32x8_SPLAT_DEFINED) && defined(VINT32x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_SPLAT_DEFINED) \ + && (defined(VINT32x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x) +{ + vint32x8 vec; + vec.dbl[0] = vint32x4_splat(x); + vec.dbl[1] = vint32x4_splat(x); + return vec; +} # define VINT32x8_SPLAT_DEFINED #endif - -#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) && defined(VINT32x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT32x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8]) +{ + vint32x8 vec; + vec.dbl[0] = vint32x4_load_aligned(x); + vec.dbl[1] = vint32x4_load_aligned(x + 4); + return vec; +} # define VINT32x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT32x8_LOAD_DEFINED) && defined(VINT32x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_LOAD_DEFINED) \ + && (defined(VINT32x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8]) +{ + vint32x8 vec; + vec.dbl[0] = vint32x4_load(x); + vec.dbl[1] = vint32x4_load(x + 4); + return vec; +} # define VINT32x8_LOAD_DEFINED #endif - -#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) && defined(VINT32x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) \ + && (defined(VINT32x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8]) +{ + vint32x4_store_aligned(vec.dbl[0], x); + vint32x4_store_aligned(vec.dbl[1], x + 4); +} # define VINT32x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT32x8_STORE_DEFINED) && defined(VINT32x4_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_STORE_DEFINED) \ + && (defined(VINT32x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8]) +{ + vint32x4_store(vec.dbl[0], x); + vint32x4_store(vec.dbl[1], x + 4); +} # define VINT32x8_STORE_DEFINED #endif - -#if !defined(VINT32x8_ADD_DEFINED) && defined(VINT32x4_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_ADD_DEFINED) \ + && (defined(VINT32x4_ADD_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_ADD_DEFINED #endif - -#if !defined(VINT32x8_SUB_DEFINED) && defined(VINT32x4_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_SUB_DEFINED) \ + && (defined(VINT32x4_SUB_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_SUB_DEFINED #endif - -#if !defined(VINT32x8_MUL_DEFINED) && defined(VINT32x4_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_MUL_DEFINED) \ + && (defined(VINT32x4_MUL_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_MUL_DEFINED #endif - -#if !defined(VINT32x8_DIV_DEFINED) && defined(VINT32x4_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_DIV_DEFINED) \ + && (defined(VINT32x4_DIV_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_div(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_DIV_DEFINED #endif - -#if !defined(VINT32x8_MOD_DEFINED) && defined(VINT32x4_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_MOD_DEFINED) \ + && (defined(VINT32x4_MOD_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_mod(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_MOD_DEFINED #endif - -#if !defined(VINT32x8_AVG_DEFINED) && defined(VINT32x4_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_AVG_DEFINED) \ + && (defined(VINT32x4_AVG_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_AVG_DEFINED #endif - -#if !defined(VINT32x8_AND_DEFINED) && defined(VINT32x4_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_AND_DEFINED) \ + && (defined(VINT32x4_AND_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_AND_DEFINED #endif - -#if !defined(VINT32x8_OR_DEFINED) && defined(VINT32x4_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_OR_DEFINED) \ + && (defined(VINT32x4_OR_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_OR_DEFINED #endif - -#if !defined(VINT32x8_XOR_DEFINED) && defined(VINT32x4_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_XOR_DEFINED) \ + && (defined(VINT32x4_XOR_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_XOR_DEFINED #endif - -#if !defined(VINT32x8_NOT_DEFINED) && defined(VINT32x4_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_NOT_DEFINED) \ + && (defined(VINT32x4_NOT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec) +{ + vec.dbl[0] = vint32x4_not(vec.dbl[0]); + vec1.dbl[1] = vint32x4_not(vec.dbl[1]); + return vec; +} # define VINT32x8_NOT_DEFINED #endif - -#if !defined(VINT32x8_CMPLT_DEFINED) && defined(VINT32x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_CMPLT_DEFINED) \ + && (defined(VINT32x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_CMPLT_DEFINED #endif - -#if !defined(VINT32x8_CMPEQ_DEFINED) && defined(VINT32x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_CMPEQ_DEFINED) \ + && (defined(VINT32x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_CMPEQ_DEFINED #endif - -#if !defined(VINT32x8_CMPGT_DEFINED) && defined(VINT32x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_CMPGT_DEFINED) \ + && (defined(VINT32x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_CMPGT_DEFINED #endif - -#if !defined(VINT32x8_CMPLE_DEFINED) && defined(VINT32x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_CMPLE_DEFINED) \ + && (defined(VINT32x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_CMPLE_DEFINED #endif - -#if !defined(VINT32x8_CMPGE_DEFINED) && defined(VINT32x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_CMPGE_DEFINED) \ + && (defined(VINT32x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_CMPGE_DEFINED #endif - -#if !defined(VINT32x8_MIN_DEFINED) && defined(VINT32x4_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_MIN_DEFINED) \ + && (defined(VINT32x4_MIN_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_MIN_DEFINED #endif - -#if !defined(VINT32x8_MAX_DEFINED) && defined(VINT32x4_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_MAX_DEFINED) \ + && (defined(VINT32x4_MAX_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_MAX_DEFINED #endif - -#if !defined(VINT32x8_RSHIFT_DEFINED) && defined(VINT32x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_RSHIFT_DEFINED) \ + && (defined(VINT32x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_RSHIFT_DEFINED #endif - -#if !defined(VINT32x8_LRSHIFT_DEFINED) && defined(VINT32x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_LRSHIFT_DEFINED) \ + && (defined(VINT32x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_LRSHIFT_DEFINED #endif - -#if !defined(VINT32x8_LSHIFT_DEFINED) && defined(VINT32x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 32, 8, 4) +#if !defined(VINT32x8_LSHIFT_DEFINED) \ + && (defined(VINT32x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vint32x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x8_LSHIFT_DEFINED #endif - - - -/* vint32x8 */ - -#if !defined(VUINT32x8_SPLAT_DEFINED) && defined(VUINT32x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 32, 8, 4) +#if !defined(VUINT32x8_SPLAT_DEFINED) \ + && (defined(VUINT32x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x) +{ + vuint32x8 vec; + vec.dbl[0] = vuint32x4_splat(x); + vec.dbl[1] = vuint32x4_splat(x); + return vec; +} # define VUINT32x8_SPLAT_DEFINED #endif - -#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) && defined(VUINT32x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 32, 8, 4) +#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT32x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8]) +{ + vuint32x8 vec; + vec.dbl[0] = vuint32x4_load_aligned(x); + vec.dbl[1] = vuint32x4_load_aligned(x + 4); + return vec; +} # define VUINT32x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x8_LOAD_DEFINED) && defined(VUINT32x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 32, 8, 4) +#if !defined(VUINT32x8_LOAD_DEFINED) \ + && (defined(VUINT32x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8]) +{ + vuint32x8 vec; + vec.dbl[0] = vuint32x4_load(x); + vec.dbl[1] = vuint32x4_load(x + 4); + return vec; +} # define VUINT32x8_LOAD_DEFINED #endif - -#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) && defined(VUINT32x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 32, 8, 4) +#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT32x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8]) +{ + vuint32x4_store_aligned(vec.dbl[0], x); + vuint32x4_store_aligned(vec.dbl[1], x + 4); +} # define VUINT32x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x8_STORE_DEFINED) && defined(VUINT32x4_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 32, 8, 4) +#if !defined(VUINT32x8_STORE_DEFINED) \ + && (defined(VUINT32x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8]) +{ + vuint32x4_store(vec.dbl[0], x); + vuint32x4_store(vec.dbl[1], x + 4); +} # define VUINT32x8_STORE_DEFINED #endif - -#if !defined(VUINT32x8_ADD_DEFINED) && defined(VUINT32x4_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 32, 8, 4) +#if !defined(VUINT32x8_ADD_DEFINED) \ + && (defined(VUINT32x4_ADD_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_ADD_DEFINED #endif - -#if !defined(VUINT32x8_SUB_DEFINED) && defined(VUINT32x4_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 32, 8, 4) +#if !defined(VUINT32x8_SUB_DEFINED) \ + && (defined(VUINT32x4_SUB_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_SUB_DEFINED #endif - -#if !defined(VUINT32x8_MUL_DEFINED) && defined(VUINT32x4_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 32, 8, 4) +#if !defined(VUINT32x8_MUL_DEFINED) \ + && (defined(VUINT32x4_MUL_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_MUL_DEFINED #endif - -#if !defined(VUINT32x8_DIV_DEFINED) && defined(VUINT32x4_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 32, 8, 4) +#if !defined(VUINT32x8_DIV_DEFINED) \ + && (defined(VUINT32x4_DIV_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_div(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_DIV_DEFINED #endif - -#if !defined(VUINT32x8_MOD_DEFINED) && defined(VUINT32x4_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 32, 8, 4) +#if !defined(VUINT32x8_MOD_DEFINED) \ + && (defined(VUINT32x4_MOD_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_mod(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_MOD_DEFINED #endif - -#if !defined(VUINT32x8_AVG_DEFINED) && defined(VUINT32x4_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 32, 8, 4) +#if !defined(VUINT32x8_AVG_DEFINED) \ + && (defined(VUINT32x4_AVG_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_AVG_DEFINED #endif - -#if !defined(VUINT32x8_AND_DEFINED) && defined(VUINT32x4_AND_DEFINED) -VEC_DOUBLE_AND(u, 32, 8, 4) +#if !defined(VUINT32x8_AND_DEFINED) \ + && (defined(VUINT32x4_AND_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_AND_DEFINED #endif - -#if !defined(VUINT32x8_OR_DEFINED) && defined(VUINT32x4_OR_DEFINED) -VEC_DOUBLE_OR(u, 32, 8, 4) +#if !defined(VUINT32x8_OR_DEFINED) \ + && (defined(VUINT32x4_OR_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_OR_DEFINED #endif - -#if !defined(VUINT32x8_XOR_DEFINED) && defined(VUINT32x4_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 32, 8, 4) +#if !defined(VUINT32x8_XOR_DEFINED) \ + && (defined(VUINT32x4_XOR_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_XOR_DEFINED #endif - -#if !defined(VUINT32x8_NOT_DEFINED) && defined(VUINT32x4_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 32, 8, 4) +#if !defined(VUINT32x8_NOT_DEFINED) \ + && (defined(VUINT32x4_NOT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec) +{ + vec.dbl[0] = vuint32x4_not(vec.dbl[0]); + vec1.dbl[1] = vuint32x4_not(vec.dbl[1]); + return vec; +} # define VUINT32x8_NOT_DEFINED #endif - -#if !defined(VUINT32x8_CMPLT_DEFINED) && defined(VUINT32x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 32, 8, 4) +#if !defined(VUINT32x8_CMPLT_DEFINED) \ + && (defined(VUINT32x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_CMPLT_DEFINED #endif - -#if !defined(VUINT32x8_CMPEQ_DEFINED) && defined(VUINT32x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 32, 8, 4) +#if !defined(VUINT32x8_CMPEQ_DEFINED) \ + && (defined(VUINT32x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_CMPEQ_DEFINED #endif - -#if !defined(VUINT32x8_CMPGT_DEFINED) && defined(VUINT32x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 32, 8, 4) +#if !defined(VUINT32x8_CMPGT_DEFINED) \ + && (defined(VUINT32x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_CMPGT_DEFINED #endif - -#if !defined(VUINT32x8_CMPLE_DEFINED) && defined(VUINT32x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 32, 8, 4) +#if !defined(VUINT32x8_CMPLE_DEFINED) \ + && (defined(VUINT32x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_CMPLE_DEFINED #endif - -#if !defined(VUINT32x8_CMPGE_DEFINED) && defined(VUINT32x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 32, 8, 4) +#if !defined(VUINT32x8_CMPGE_DEFINED) \ + && (defined(VUINT32x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_CMPGE_DEFINED #endif - -#if !defined(VUINT32x8_MIN_DEFINED) && defined(VUINT32x4_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 32, 8, 4) +#if !defined(VUINT32x8_MIN_DEFINED) \ + && (defined(VUINT32x4_MIN_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_MIN_DEFINED #endif - -#if !defined(VUINT32x8_MAX_DEFINED) && defined(VUINT32x4_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 32, 8, 4) +#if !defined(VUINT32x8_MAX_DEFINED) \ + && (defined(VUINT32x4_MAX_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_MAX_DEFINED #endif - -#if !defined(VUINT32x8_RSHIFT_DEFINED) && defined(VUINT32x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 32, 8, 4) +#if !defined(VUINT32x8_RSHIFT_DEFINED) \ + && (defined(VUINT32x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_RSHIFT_DEFINED #endif - -#if !defined(VUINT32x8_LRSHIFT_DEFINED) && defined(VUINT32x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 32, 8, 4) +#if !defined(VUINT32x8_LRSHIFT_DEFINED) \ + && (defined(VUINT32x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_LRSHIFT_DEFINED #endif - -#if !defined(VUINT32x8_LSHIFT_DEFINED) && defined(VUINT32x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 32, 8, 4) +#if !defined(VUINT32x8_LSHIFT_DEFINED) \ + && (defined(VUINT32x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.dbl[0] = vuint32x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x8_LSHIFT_DEFINED #endif - - - -/* vuint32x16 */ - -#if !defined(VINT32x16_SPLAT_DEFINED) && defined(VINT32x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_SPLAT_DEFINED) \ + && (defined(VINT32x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x) +{ + vint32x16 vec; + vec.dbl[0] = vint32x8_splat(x); + vec.dbl[1] = vint32x8_splat(x); + return vec; +} # define VINT32x16_SPLAT_DEFINED #endif - -#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) && defined(VINT32x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT32x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16]) +{ + vint32x16 vec; + vec.dbl[0] = vint32x8_load_aligned(x); + vec.dbl[1] = vint32x8_load_aligned(x + 8); + return vec; +} # define VINT32x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT32x16_LOAD_DEFINED) && defined(VINT32x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_LOAD_DEFINED) \ + && (defined(VINT32x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16]) +{ + vint32x16 vec; + vec.dbl[0] = vint32x8_load(x); + vec.dbl[1] = vint32x8_load(x + 8); + return vec; +} # define VINT32x16_LOAD_DEFINED #endif - -#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) && defined(VINT32x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) \ + && (defined(VINT32x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16]) +{ + vint32x8_store_aligned(vec.dbl[0], x); + vint32x8_store_aligned(vec.dbl[1], x + 8); +} # define VINT32x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT32x16_STORE_DEFINED) && defined(VINT32x8_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_STORE_DEFINED) \ + && (defined(VINT32x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16]) +{ + vint32x8_store(vec.dbl[0], x); + vint32x8_store(vec.dbl[1], x + 8); +} # define VINT32x16_STORE_DEFINED #endif - -#if !defined(VINT32x16_ADD_DEFINED) && defined(VINT32x8_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_ADD_DEFINED) \ + && (defined(VINT32x8_ADD_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_ADD_DEFINED #endif - -#if !defined(VINT32x16_SUB_DEFINED) && defined(VINT32x8_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_SUB_DEFINED) \ + && (defined(VINT32x8_SUB_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_SUB_DEFINED #endif - -#if !defined(VINT32x16_MUL_DEFINED) && defined(VINT32x8_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_MUL_DEFINED) \ + && (defined(VINT32x8_MUL_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_MUL_DEFINED #endif - -#if !defined(VINT32x16_DIV_DEFINED) && defined(VINT32x8_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_DIV_DEFINED) \ + && (defined(VINT32x8_DIV_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_div(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_DIV_DEFINED #endif - -#if !defined(VINT32x16_MOD_DEFINED) && defined(VINT32x8_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_MOD_DEFINED) \ + && (defined(VINT32x8_MOD_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_mod(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_MOD_DEFINED #endif - -#if !defined(VINT32x16_AVG_DEFINED) && defined(VINT32x8_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_AVG_DEFINED) \ + && (defined(VINT32x8_AVG_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_AVG_DEFINED #endif - -#if !defined(VINT32x16_AND_DEFINED) && defined(VINT32x8_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_AND_DEFINED) \ + && (defined(VINT32x8_AND_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_AND_DEFINED #endif - -#if !defined(VINT32x16_OR_DEFINED) && defined(VINT32x8_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_OR_DEFINED) \ + && (defined(VINT32x8_OR_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_OR_DEFINED #endif - -#if !defined(VINT32x16_XOR_DEFINED) && defined(VINT32x8_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_XOR_DEFINED) \ + && (defined(VINT32x8_XOR_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_XOR_DEFINED #endif - -#if !defined(VINT32x16_NOT_DEFINED) && defined(VINT32x8_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_NOT_DEFINED) \ + && (defined(VINT32x8_NOT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec) +{ + vec.dbl[0] = vint32x8_not(vec.dbl[0]); + vec1.dbl[1] = vint32x8_not(vec.dbl[1]); + return vec; +} # define VINT32x16_NOT_DEFINED #endif - -#if !defined(VINT32x16_CMPLT_DEFINED) && defined(VINT32x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_CMPLT_DEFINED) \ + && (defined(VINT32x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_CMPLT_DEFINED #endif - -#if !defined(VINT32x16_CMPEQ_DEFINED) && defined(VINT32x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_CMPEQ_DEFINED) \ + && (defined(VINT32x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_CMPEQ_DEFINED #endif - -#if !defined(VINT32x16_CMPGT_DEFINED) && defined(VINT32x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_CMPGT_DEFINED) \ + && (defined(VINT32x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_CMPGT_DEFINED #endif - -#if !defined(VINT32x16_CMPLE_DEFINED) && defined(VINT32x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_CMPLE_DEFINED) \ + && (defined(VINT32x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_CMPLE_DEFINED #endif - -#if !defined(VINT32x16_CMPGE_DEFINED) && defined(VINT32x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_CMPGE_DEFINED) \ + && (defined(VINT32x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_CMPGE_DEFINED #endif - -#if !defined(VINT32x16_MIN_DEFINED) && defined(VINT32x8_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_MIN_DEFINED) \ + && (defined(VINT32x8_MIN_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_MIN_DEFINED #endif - -#if !defined(VINT32x16_MAX_DEFINED) && defined(VINT32x8_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_MAX_DEFINED) \ + && (defined(VINT32x8_MAX_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_MAX_DEFINED #endif - -#if !defined(VINT32x16_RSHIFT_DEFINED) && defined(VINT32x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_RSHIFT_DEFINED) \ + && (defined(VINT32x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_RSHIFT_DEFINED #endif - -#if !defined(VINT32x16_LRSHIFT_DEFINED) && defined(VINT32x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_LRSHIFT_DEFINED) \ + && (defined(VINT32x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_LRSHIFT_DEFINED #endif - -#if !defined(VINT32x16_LSHIFT_DEFINED) && defined(VINT32x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 32, 16, 8) +#if !defined(VINT32x16_LSHIFT_DEFINED) \ + && (defined(VINT32x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vint32x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint32x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT32x16_LSHIFT_DEFINED #endif - - - -/* vint32x16 */ - -#if !defined(VUINT32x16_SPLAT_DEFINED) && defined(VUINT32x8_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 32, 16, 8) +#if !defined(VUINT32x16_SPLAT_DEFINED) \ + && (defined(VUINT32x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x) +{ + vuint32x16 vec; + vec.dbl[0] = vuint32x8_splat(x); + vec.dbl[1] = vuint32x8_splat(x); + return vec; +} # define VUINT32x16_SPLAT_DEFINED #endif - -#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) && defined(VUINT32x8_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 32, 16, 8) +#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT32x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16]) +{ + vuint32x16 vec; + vec.dbl[0] = vuint32x8_load_aligned(x); + vec.dbl[1] = vuint32x8_load_aligned(x + 8); + return vec; +} # define VUINT32x16_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x16_LOAD_DEFINED) && defined(VUINT32x8_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 32, 16, 8) +#if !defined(VUINT32x16_LOAD_DEFINED) \ + && (defined(VUINT32x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16]) +{ + vuint32x16 vec; + vec.dbl[0] = vuint32x8_load(x); + vec.dbl[1] = vuint32x8_load(x + 8); + return vec; +} # define VUINT32x16_LOAD_DEFINED #endif - -#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) && defined(VUINT32x8_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 32, 16, 8) +#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT32x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16]) +{ + vuint32x8_store_aligned(vec.dbl[0], x); + vuint32x8_store_aligned(vec.dbl[1], x + 8); +} # define VUINT32x16_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT32x16_STORE_DEFINED) && defined(VUINT32x8_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 32, 16, 8) +#if !defined(VUINT32x16_STORE_DEFINED) \ + && (defined(VUINT32x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16]) +{ + vuint32x8_store(vec.dbl[0], x); + vuint32x8_store(vec.dbl[1], x + 8); +} # define VUINT32x16_STORE_DEFINED #endif - -#if !defined(VUINT32x16_ADD_DEFINED) && defined(VUINT32x8_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 32, 16, 8) +#if !defined(VUINT32x16_ADD_DEFINED) \ + && (defined(VUINT32x8_ADD_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_ADD_DEFINED #endif - -#if !defined(VUINT32x16_SUB_DEFINED) && defined(VUINT32x8_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 32, 16, 8) +#if !defined(VUINT32x16_SUB_DEFINED) \ + && (defined(VUINT32x8_SUB_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_SUB_DEFINED #endif - -#if !defined(VUINT32x16_MUL_DEFINED) && defined(VUINT32x8_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 32, 16, 8) +#if !defined(VUINT32x16_MUL_DEFINED) \ + && (defined(VUINT32x8_MUL_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_MUL_DEFINED #endif - -#if !defined(VUINT32x16_DIV_DEFINED) && defined(VUINT32x8_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 32, 16, 8) +#if !defined(VUINT32x16_DIV_DEFINED) \ + && (defined(VUINT32x8_DIV_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_div(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_DIV_DEFINED #endif - -#if !defined(VUINT32x16_MOD_DEFINED) && defined(VUINT32x8_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 32, 16, 8) +#if !defined(VUINT32x16_MOD_DEFINED) \ + && (defined(VUINT32x8_MOD_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_mod(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_MOD_DEFINED #endif - -#if !defined(VUINT32x16_AVG_DEFINED) && defined(VUINT32x8_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 32, 16, 8) +#if !defined(VUINT32x16_AVG_DEFINED) \ + && (defined(VUINT32x8_AVG_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_AVG_DEFINED #endif - -#if !defined(VUINT32x16_AND_DEFINED) && defined(VUINT32x8_AND_DEFINED) -VEC_DOUBLE_AND(u, 32, 16, 8) +#if !defined(VUINT32x16_AND_DEFINED) \ + && (defined(VUINT32x8_AND_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_AND_DEFINED #endif - -#if !defined(VUINT32x16_OR_DEFINED) && defined(VUINT32x8_OR_DEFINED) -VEC_DOUBLE_OR(u, 32, 16, 8) +#if !defined(VUINT32x16_OR_DEFINED) \ + && (defined(VUINT32x8_OR_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_OR_DEFINED #endif - -#if !defined(VUINT32x16_XOR_DEFINED) && defined(VUINT32x8_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 32, 16, 8) +#if !defined(VUINT32x16_XOR_DEFINED) \ + && (defined(VUINT32x8_XOR_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_XOR_DEFINED #endif - -#if !defined(VUINT32x16_NOT_DEFINED) && defined(VUINT32x8_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 32, 16, 8) +#if !defined(VUINT32x16_NOT_DEFINED) \ + && (defined(VUINT32x8_NOT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec) +{ + vec.dbl[0] = vuint32x8_not(vec.dbl[0]); + vec1.dbl[1] = vuint32x8_not(vec.dbl[1]); + return vec; +} # define VUINT32x16_NOT_DEFINED #endif - -#if !defined(VUINT32x16_CMPLT_DEFINED) && defined(VUINT32x8_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 32, 16, 8) +#if !defined(VUINT32x16_CMPLT_DEFINED) \ + && (defined(VUINT32x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_CMPLT_DEFINED #endif - -#if !defined(VUINT32x16_CMPEQ_DEFINED) && defined(VUINT32x8_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 32, 16, 8) +#if !defined(VUINT32x16_CMPEQ_DEFINED) \ + && (defined(VUINT32x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_CMPEQ_DEFINED #endif - -#if !defined(VUINT32x16_CMPGT_DEFINED) && defined(VUINT32x8_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 32, 16, 8) +#if !defined(VUINT32x16_CMPGT_DEFINED) \ + && (defined(VUINT32x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_CMPGT_DEFINED #endif - -#if !defined(VUINT32x16_CMPLE_DEFINED) && defined(VUINT32x8_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 32, 16, 8) +#if !defined(VUINT32x16_CMPLE_DEFINED) \ + && (defined(VUINT32x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_CMPLE_DEFINED #endif - -#if !defined(VUINT32x16_CMPGE_DEFINED) && defined(VUINT32x8_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 32, 16, 8) +#if !defined(VUINT32x16_CMPGE_DEFINED) \ + && (defined(VUINT32x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_CMPGE_DEFINED #endif - -#if !defined(VUINT32x16_MIN_DEFINED) && defined(VUINT32x8_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 32, 16, 8) +#if !defined(VUINT32x16_MIN_DEFINED) \ + && (defined(VUINT32x8_MIN_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_MIN_DEFINED #endif - -#if !defined(VUINT32x16_MAX_DEFINED) && defined(VUINT32x8_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 32, 16, 8) +#if !defined(VUINT32x16_MAX_DEFINED) \ + && (defined(VUINT32x8_MAX_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_MAX_DEFINED #endif - -#if !defined(VUINT32x16_RSHIFT_DEFINED) && defined(VUINT32x8_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 32, 16, 8) +#if !defined(VUINT32x16_RSHIFT_DEFINED) \ + && (defined(VUINT32x8_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_RSHIFT_DEFINED #endif - -#if !defined(VUINT32x16_LRSHIFT_DEFINED) && defined(VUINT32x8_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 32, 16, 8) +#if !defined(VUINT32x16_LRSHIFT_DEFINED) \ + && (defined(VUINT32x8_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_LRSHIFT_DEFINED #endif - -#if !defined(VUINT32x16_LSHIFT_DEFINED) && defined(VUINT32x8_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 32, 16, 8) +#if !defined(VUINT32x16_LSHIFT_DEFINED) \ + && (defined(VUINT32x8_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.dbl[0] = vuint32x8_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint32x8_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT32x16_LSHIFT_DEFINED #endif - - - -/* vuint64x4 */ - -#if !defined(VINT64x4_SPLAT_DEFINED) && defined(VINT64x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x2_SPLAT_DEFINED) \ + && (defined(VINT64x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x) +{ + vint64x2 vec; + vec.dbl[0] = vint64x1_splat(x); + vec.dbl[1] = vint64x1_splat(x); + return vec; +} +# define VINT64x2_SPLAT_DEFINED +#endif +#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT64x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2]) +{ + vint64x2 vec; + vec.dbl[0] = vint64x1_load_aligned(x); + vec.dbl[1] = vint64x1_load_aligned(x + 1); + return vec; +} +# define VINT64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT64x2_LOAD_DEFINED) \ + && (defined(VINT64x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2]) +{ + vint64x2 vec; + vec.dbl[0] = vint64x1_load(x); + vec.dbl[1] = vint64x1_load(x + 1); + return vec; +} +# define VINT64x2_LOAD_DEFINED +#endif +#if !defined(VINT64x2_STORE_ALIGNED_DEFINED) \ + && (defined(VINT64x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2]) +{ + vint64x1_store_aligned(vec.dbl[0], x); + vint64x1_store_aligned(vec.dbl[1], x + 1); +} +# define VINT64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT64x2_STORE_DEFINED) \ + && (defined(VINT64x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2]) +{ + vint64x1_store(vec.dbl[0], x); + vint64x1_store(vec.dbl[1], x + 1); +} +# define VINT64x2_STORE_DEFINED +#endif +#if !defined(VINT64x2_ADD_DEFINED) \ + && (defined(VINT64x1_ADD_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_ADD_DEFINED +#endif +#if !defined(VINT64x2_SUB_DEFINED) \ + && (defined(VINT64x1_SUB_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_SUB_DEFINED +#endif +#if !defined(VINT64x2_MUL_DEFINED) \ + && (defined(VINT64x1_MUL_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_MUL_DEFINED +#endif +#if !defined(VINT64x2_DIV_DEFINED) \ + && (defined(VINT64x1_DIV_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_div(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_DIV_DEFINED +#endif +#if !defined(VINT64x2_MOD_DEFINED) \ + && (defined(VINT64x1_MOD_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_mod(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_MOD_DEFINED +#endif +#if !defined(VINT64x2_AVG_DEFINED) \ + && (defined(VINT64x1_AVG_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_AVG_DEFINED +#endif +#if !defined(VINT64x2_AND_DEFINED) \ + && (defined(VINT64x1_AND_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_AND_DEFINED +#endif +#if !defined(VINT64x2_OR_DEFINED) \ + && (defined(VINT64x1_OR_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_OR_DEFINED +#endif +#if !defined(VINT64x2_XOR_DEFINED) \ + && (defined(VINT64x1_XOR_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_XOR_DEFINED +#endif +#if !defined(VINT64x2_NOT_DEFINED) \ + && (defined(VINT64x1_NOT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec) +{ + vec.dbl[0] = vint64x1_not(vec.dbl[0]); + vec1.dbl[1] = vint64x1_not(vec.dbl[1]); + return vec; +} +# define VINT64x2_NOT_DEFINED +#endif +#if !defined(VINT64x2_CMPLT_DEFINED) \ + && (defined(VINT64x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_CMPLT_DEFINED +#endif +#if !defined(VINT64x2_CMPEQ_DEFINED) \ + && (defined(VINT64x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_CMPEQ_DEFINED +#endif +#if !defined(VINT64x2_CMPGT_DEFINED) \ + && (defined(VINT64x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_CMPGT_DEFINED +#endif +#if !defined(VINT64x2_CMPLE_DEFINED) \ + && (defined(VINT64x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_CMPLE_DEFINED +#endif +#if !defined(VINT64x2_CMPGE_DEFINED) \ + && (defined(VINT64x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_CMPGE_DEFINED +#endif +#if !defined(VINT64x2_MIN_DEFINED) \ + && (defined(VINT64x1_MIN_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_MIN_DEFINED +#endif +#if !defined(VINT64x2_MAX_DEFINED) \ + && (defined(VINT64x1_MAX_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_MAX_DEFINED +#endif +#if !defined(VINT64x2_RSHIFT_DEFINED) \ + && (defined(VINT64x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_RSHIFT_DEFINED +#endif +#if !defined(VINT64x2_LRSHIFT_DEFINED) \ + && (defined(VINT64x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_LRSHIFT_DEFINED +#endif +#if !defined(VINT64x2_LSHIFT_DEFINED) \ + && (defined(VINT64x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vint64x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VINT64x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x2_SPLAT_DEFINED) \ + && (defined(VUINT64x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x) +{ + vuint64x2 vec; + vec.dbl[0] = vuint64x1_splat(x); + vec.dbl[1] = vuint64x1_splat(x); + return vec; +} +# define VUINT64x2_SPLAT_DEFINED +#endif +#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT64x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2]) +{ + vuint64x2 vec; + vec.dbl[0] = vuint64x1_load_aligned(x); + vec.dbl[1] = vuint64x1_load_aligned(x + 1); + return vec; +} +# define VUINT64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x2_LOAD_DEFINED) \ + && (defined(VUINT64x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2]) +{ + vuint64x2 vec; + vec.dbl[0] = vuint64x1_load(x); + vec.dbl[1] = vuint64x1_load(x + 1); + return vec; +} +# define VUINT64x2_LOAD_DEFINED +#endif +#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT64x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2]) +{ + vuint64x1_store_aligned(vec.dbl[0], x); + vuint64x1_store_aligned(vec.dbl[1], x + 1); +} +# define VUINT64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x2_STORE_DEFINED) \ + && (defined(VUINT64x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2]) +{ + vuint64x1_store(vec.dbl[0], x); + vuint64x1_store(vec.dbl[1], x + 1); +} +# define VUINT64x2_STORE_DEFINED +#endif +#if !defined(VUINT64x2_ADD_DEFINED) \ + && (defined(VUINT64x1_ADD_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_ADD_DEFINED +#endif +#if !defined(VUINT64x2_SUB_DEFINED) \ + && (defined(VUINT64x1_SUB_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_SUB_DEFINED +#endif +#if !defined(VUINT64x2_MUL_DEFINED) \ + && (defined(VUINT64x1_MUL_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_MUL_DEFINED +#endif +#if !defined(VUINT64x2_DIV_DEFINED) \ + && (defined(VUINT64x1_DIV_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_div(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_DIV_DEFINED +#endif +#if !defined(VUINT64x2_MOD_DEFINED) \ + && (defined(VUINT64x1_MOD_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_mod(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_MOD_DEFINED +#endif +#if !defined(VUINT64x2_AVG_DEFINED) \ + && (defined(VUINT64x1_AVG_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_AVG_DEFINED +#endif +#if !defined(VUINT64x2_AND_DEFINED) \ + && (defined(VUINT64x1_AND_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_AND_DEFINED +#endif +#if !defined(VUINT64x2_OR_DEFINED) \ + && (defined(VUINT64x1_OR_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_OR_DEFINED +#endif +#if !defined(VUINT64x2_XOR_DEFINED) \ + && (defined(VUINT64x1_XOR_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_XOR_DEFINED +#endif +#if !defined(VUINT64x2_NOT_DEFINED) \ + && (defined(VUINT64x1_NOT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec) +{ + vec.dbl[0] = vuint64x1_not(vec.dbl[0]); + vec1.dbl[1] = vuint64x1_not(vec.dbl[1]); + return vec; +} +# define VUINT64x2_NOT_DEFINED +#endif +#if !defined(VUINT64x2_CMPLT_DEFINED) \ + && (defined(VUINT64x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_CMPLT_DEFINED +#endif +#if !defined(VUINT64x2_CMPEQ_DEFINED) \ + && (defined(VUINT64x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_CMPEQ_DEFINED +#endif +#if !defined(VUINT64x2_CMPGT_DEFINED) \ + && (defined(VUINT64x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_CMPGT_DEFINED +#endif +#if !defined(VUINT64x2_CMPLE_DEFINED) \ + && (defined(VUINT64x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_CMPLE_DEFINED +#endif +#if !defined(VUINT64x2_CMPGE_DEFINED) \ + && (defined(VUINT64x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_CMPGE_DEFINED +#endif +#if !defined(VUINT64x2_MIN_DEFINED) \ + && (defined(VUINT64x1_MIN_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_MIN_DEFINED +#endif +#if !defined(VUINT64x2_MAX_DEFINED) \ + && (defined(VUINT64x1_MAX_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_MAX_DEFINED +#endif +#if !defined(VUINT64x2_RSHIFT_DEFINED) \ + && (defined(VUINT64x1_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_RSHIFT_DEFINED +#endif +#if !defined(VUINT64x2_LRSHIFT_DEFINED) \ + && (defined(VUINT64x1_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_LRSHIFT_DEFINED +#endif +#if !defined(VUINT64x2_LSHIFT_DEFINED) \ + && (defined(VUINT64x1_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.dbl[0] = vuint64x1_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x1_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VUINT64x2_LSHIFT_DEFINED +#endif +#if !defined(VINT64x4_SPLAT_DEFINED) \ + && (defined(VINT64x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x) +{ + vint64x4 vec; + vec.dbl[0] = vint64x2_splat(x); + vec.dbl[1] = vint64x2_splat(x); + return vec; +} # define VINT64x4_SPLAT_DEFINED #endif - -#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) && defined(VINT64x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT64x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4]) +{ + vint64x4 vec; + vec.dbl[0] = vint64x2_load_aligned(x); + vec.dbl[1] = vint64x2_load_aligned(x + 2); + return vec; +} # define VINT64x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT64x4_LOAD_DEFINED) && defined(VINT64x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_LOAD_DEFINED) \ + && (defined(VINT64x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4]) +{ + vint64x4 vec; + vec.dbl[0] = vint64x2_load(x); + vec.dbl[1] = vint64x2_load(x + 2); + return vec; +} # define VINT64x4_LOAD_DEFINED #endif - -#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) && defined(VINT64x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) \ + && (defined(VINT64x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4]) +{ + vint64x2_store_aligned(vec.dbl[0], x); + vint64x2_store_aligned(vec.dbl[1], x + 2); +} # define VINT64x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT64x4_STORE_DEFINED) && defined(VINT64x2_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_STORE_DEFINED) \ + && (defined(VINT64x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4]) +{ + vint64x2_store(vec.dbl[0], x); + vint64x2_store(vec.dbl[1], x + 2); +} # define VINT64x4_STORE_DEFINED #endif - -#if !defined(VINT64x4_ADD_DEFINED) && defined(VINT64x2_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_ADD_DEFINED) \ + && (defined(VINT64x2_ADD_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_ADD_DEFINED #endif - -#if !defined(VINT64x4_SUB_DEFINED) && defined(VINT64x2_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_SUB_DEFINED) \ + && (defined(VINT64x2_SUB_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_SUB_DEFINED #endif - -#if !defined(VINT64x4_MUL_DEFINED) && defined(VINT64x2_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_MUL_DEFINED) \ + && (defined(VINT64x2_MUL_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_MUL_DEFINED #endif - -#if !defined(VINT64x4_DIV_DEFINED) && defined(VINT64x2_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_DIV_DEFINED) \ + && (defined(VINT64x2_DIV_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_div(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_DIV_DEFINED #endif - -#if !defined(VINT64x4_MOD_DEFINED) && defined(VINT64x2_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_MOD_DEFINED) \ + && (defined(VINT64x2_MOD_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_mod(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_MOD_DEFINED #endif - -#if !defined(VINT64x4_AVG_DEFINED) && defined(VINT64x2_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_AVG_DEFINED) \ + && (defined(VINT64x2_AVG_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_AVG_DEFINED #endif - -#if !defined(VINT64x4_AND_DEFINED) && defined(VINT64x2_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_AND_DEFINED) \ + && (defined(VINT64x2_AND_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_AND_DEFINED #endif - -#if !defined(VINT64x4_OR_DEFINED) && defined(VINT64x2_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_OR_DEFINED) \ + && (defined(VINT64x2_OR_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_OR_DEFINED #endif - -#if !defined(VINT64x4_XOR_DEFINED) && defined(VINT64x2_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_XOR_DEFINED) \ + && (defined(VINT64x2_XOR_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_XOR_DEFINED #endif - -#if !defined(VINT64x4_NOT_DEFINED) && defined(VINT64x2_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_NOT_DEFINED) \ + && (defined(VINT64x2_NOT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec) +{ + vec.dbl[0] = vint64x2_not(vec.dbl[0]); + vec1.dbl[1] = vint64x2_not(vec.dbl[1]); + return vec; +} # define VINT64x4_NOT_DEFINED #endif - -#if !defined(VINT64x4_CMPLT_DEFINED) && defined(VINT64x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_CMPLT_DEFINED) \ + && (defined(VINT64x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_CMPLT_DEFINED #endif - -#if !defined(VINT64x4_CMPEQ_DEFINED) && defined(VINT64x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_CMPEQ_DEFINED) \ + && (defined(VINT64x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_CMPEQ_DEFINED #endif - -#if !defined(VINT64x4_CMPGT_DEFINED) && defined(VINT64x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_CMPGT_DEFINED) \ + && (defined(VINT64x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_CMPGT_DEFINED #endif - -#if !defined(VINT64x4_CMPLE_DEFINED) && defined(VINT64x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_CMPLE_DEFINED) \ + && (defined(VINT64x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_CMPLE_DEFINED #endif - -#if !defined(VINT64x4_CMPGE_DEFINED) && defined(VINT64x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_CMPGE_DEFINED) \ + && (defined(VINT64x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_CMPGE_DEFINED #endif - -#if !defined(VINT64x4_MIN_DEFINED) && defined(VINT64x2_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_MIN_DEFINED) \ + && (defined(VINT64x2_MIN_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_MIN_DEFINED #endif - -#if !defined(VINT64x4_MAX_DEFINED) && defined(VINT64x2_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_MAX_DEFINED) \ + && (defined(VINT64x2_MAX_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_MAX_DEFINED #endif - -#if !defined(VINT64x4_RSHIFT_DEFINED) && defined(VINT64x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_RSHIFT_DEFINED) \ + && (defined(VINT64x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_RSHIFT_DEFINED #endif - -#if !defined(VINT64x4_LRSHIFT_DEFINED) && defined(VINT64x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_LRSHIFT_DEFINED) \ + && (defined(VINT64x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_LRSHIFT_DEFINED #endif - -#if !defined(VINT64x4_LSHIFT_DEFINED) && defined(VINT64x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 64, 4, 2) +#if !defined(VINT64x4_LSHIFT_DEFINED) \ + && (defined(VINT64x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vint64x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x4_LSHIFT_DEFINED #endif - - - -/* vint64x4 */ - -#if !defined(VUINT64x4_SPLAT_DEFINED) && defined(VUINT64x2_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 64, 4, 2) +#if !defined(VUINT64x4_SPLAT_DEFINED) \ + && (defined(VUINT64x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x) +{ + vuint64x4 vec; + vec.dbl[0] = vuint64x2_splat(x); + vec.dbl[1] = vuint64x2_splat(x); + return vec; +} # define VUINT64x4_SPLAT_DEFINED #endif - -#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) && defined(VUINT64x2_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 64, 4, 2) +#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT64x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4]) +{ + vuint64x4 vec; + vec.dbl[0] = vuint64x2_load_aligned(x); + vec.dbl[1] = vuint64x2_load_aligned(x + 2); + return vec; +} # define VUINT64x4_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT64x4_LOAD_DEFINED) && defined(VUINT64x2_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 64, 4, 2) +#if !defined(VUINT64x4_LOAD_DEFINED) \ + && (defined(VUINT64x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4]) +{ + vuint64x4 vec; + vec.dbl[0] = vuint64x2_load(x); + vec.dbl[1] = vuint64x2_load(x + 2); + return vec; +} # define VUINT64x4_LOAD_DEFINED #endif - -#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) && defined(VUINT64x2_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 64, 4, 2) +#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT64x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4]) +{ + vuint64x2_store_aligned(vec.dbl[0], x); + vuint64x2_store_aligned(vec.dbl[1], x + 2); +} # define VUINT64x4_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT64x4_STORE_DEFINED) && defined(VUINT64x2_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 64, 4, 2) +#if !defined(VUINT64x4_STORE_DEFINED) \ + && (defined(VUINT64x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4]) +{ + vuint64x2_store(vec.dbl[0], x); + vuint64x2_store(vec.dbl[1], x + 2); +} # define VUINT64x4_STORE_DEFINED #endif - -#if !defined(VUINT64x4_ADD_DEFINED) && defined(VUINT64x2_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 64, 4, 2) +#if !defined(VUINT64x4_ADD_DEFINED) \ + && (defined(VUINT64x2_ADD_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_ADD_DEFINED #endif - -#if !defined(VUINT64x4_SUB_DEFINED) && defined(VUINT64x2_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 64, 4, 2) +#if !defined(VUINT64x4_SUB_DEFINED) \ + && (defined(VUINT64x2_SUB_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_SUB_DEFINED #endif - -#if !defined(VUINT64x4_MUL_DEFINED) && defined(VUINT64x2_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 64, 4, 2) +#if !defined(VUINT64x4_MUL_DEFINED) \ + && (defined(VUINT64x2_MUL_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_MUL_DEFINED #endif - -#if !defined(VUINT64x4_DIV_DEFINED) && defined(VUINT64x2_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 64, 4, 2) +#if !defined(VUINT64x4_DIV_DEFINED) \ + && (defined(VUINT64x2_DIV_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_div(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_DIV_DEFINED #endif - -#if !defined(VUINT64x4_MOD_DEFINED) && defined(VUINT64x2_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 64, 4, 2) +#if !defined(VUINT64x4_MOD_DEFINED) \ + && (defined(VUINT64x2_MOD_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_mod(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_MOD_DEFINED #endif - -#if !defined(VUINT64x4_AVG_DEFINED) && defined(VUINT64x2_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 64, 4, 2) +#if !defined(VUINT64x4_AVG_DEFINED) \ + && (defined(VUINT64x2_AVG_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_AVG_DEFINED #endif - -#if !defined(VUINT64x4_AND_DEFINED) && defined(VUINT64x2_AND_DEFINED) -VEC_DOUBLE_AND(u, 64, 4, 2) +#if !defined(VUINT64x4_AND_DEFINED) \ + && (defined(VUINT64x2_AND_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_AND_DEFINED #endif - -#if !defined(VUINT64x4_OR_DEFINED) && defined(VUINT64x2_OR_DEFINED) -VEC_DOUBLE_OR(u, 64, 4, 2) +#if !defined(VUINT64x4_OR_DEFINED) \ + && (defined(VUINT64x2_OR_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_OR_DEFINED #endif - -#if !defined(VUINT64x4_XOR_DEFINED) && defined(VUINT64x2_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 64, 4, 2) +#if !defined(VUINT64x4_XOR_DEFINED) \ + && (defined(VUINT64x2_XOR_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_XOR_DEFINED #endif - -#if !defined(VUINT64x4_NOT_DEFINED) && defined(VUINT64x2_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 64, 4, 2) +#if !defined(VUINT64x4_NOT_DEFINED) \ + && (defined(VUINT64x2_NOT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec) +{ + vec.dbl[0] = vuint64x2_not(vec.dbl[0]); + vec1.dbl[1] = vuint64x2_not(vec.dbl[1]); + return vec; +} # define VUINT64x4_NOT_DEFINED #endif - -#if !defined(VUINT64x4_CMPLT_DEFINED) && defined(VUINT64x2_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 64, 4, 2) +#if !defined(VUINT64x4_CMPLT_DEFINED) \ + && (defined(VUINT64x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_CMPLT_DEFINED #endif - -#if !defined(VUINT64x4_CMPEQ_DEFINED) && defined(VUINT64x2_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 64, 4, 2) +#if !defined(VUINT64x4_CMPEQ_DEFINED) \ + && (defined(VUINT64x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_CMPEQ_DEFINED #endif - -#if !defined(VUINT64x4_CMPGT_DEFINED) && defined(VUINT64x2_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 64, 4, 2) +#if !defined(VUINT64x4_CMPGT_DEFINED) \ + && (defined(VUINT64x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_CMPGT_DEFINED #endif - -#if !defined(VUINT64x4_CMPLE_DEFINED) && defined(VUINT64x2_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 64, 4, 2) +#if !defined(VUINT64x4_CMPLE_DEFINED) \ + && (defined(VUINT64x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_CMPLE_DEFINED #endif - -#if !defined(VUINT64x4_CMPGE_DEFINED) && defined(VUINT64x2_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 64, 4, 2) +#if !defined(VUINT64x4_CMPGE_DEFINED) \ + && (defined(VUINT64x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_CMPGE_DEFINED #endif - -#if !defined(VUINT64x4_MIN_DEFINED) && defined(VUINT64x2_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 64, 4, 2) +#if !defined(VUINT64x4_MIN_DEFINED) \ + && (defined(VUINT64x2_MIN_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_MIN_DEFINED #endif - -#if !defined(VUINT64x4_MAX_DEFINED) && defined(VUINT64x2_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 64, 4, 2) +#if !defined(VUINT64x4_MAX_DEFINED) \ + && (defined(VUINT64x2_MAX_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_MAX_DEFINED #endif - -#if !defined(VUINT64x4_RSHIFT_DEFINED) && defined(VUINT64x2_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 64, 4, 2) +#if !defined(VUINT64x4_RSHIFT_DEFINED) \ + && (defined(VUINT64x2_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_RSHIFT_DEFINED #endif - -#if !defined(VUINT64x4_LRSHIFT_DEFINED) && defined(VUINT64x2_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 64, 4, 2) +#if !defined(VUINT64x4_LRSHIFT_DEFINED) \ + && (defined(VUINT64x2_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_LRSHIFT_DEFINED #endif - -#if !defined(VUINT64x4_LSHIFT_DEFINED) && defined(VUINT64x2_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 64, 4, 2) +#if !defined(VUINT64x4_LSHIFT_DEFINED) \ + && (defined(VUINT64x2_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.dbl[0] = vuint64x2_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x2_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x4_LSHIFT_DEFINED #endif - - - -/* vuint64x8 */ - -#if !defined(VINT64x8_SPLAT_DEFINED) && defined(VINT64x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_SPLAT_DEFINED) \ + && (defined(VINT64x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x) +{ + vint64x8 vec; + vec.dbl[0] = vint64x4_splat(x); + vec.dbl[1] = vint64x4_splat(x); + return vec; +} # define VINT64x8_SPLAT_DEFINED #endif - -#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) && defined(VINT64x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VINT64x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8]) +{ + vint64x8 vec; + vec.dbl[0] = vint64x4_load_aligned(x); + vec.dbl[1] = vint64x4_load_aligned(x + 4); + return vec; +} # define VINT64x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VINT64x8_LOAD_DEFINED) && defined(VINT64x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_LOAD_DEFINED) \ + && (defined(VINT64x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8]) +{ + vint64x8 vec; + vec.dbl[0] = vint64x4_load(x); + vec.dbl[1] = vint64x4_load(x + 4); + return vec; +} # define VINT64x8_LOAD_DEFINED #endif - -#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) && defined(VINT64x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) \ + && (defined(VINT64x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8]) +{ + vint64x4_store_aligned(vec.dbl[0], x); + vint64x4_store_aligned(vec.dbl[1], x + 4); +} # define VINT64x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VINT64x8_STORE_DEFINED) && defined(VINT64x4_STORE_DEFINED) -VEC_DOUBLE_STORE(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_STORE_DEFINED) \ + && (defined(VINT64x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8]) +{ + vint64x4_store(vec.dbl[0], x); + vint64x4_store(vec.dbl[1], x + 4); +} # define VINT64x8_STORE_DEFINED #endif - -#if !defined(VINT64x8_ADD_DEFINED) && defined(VINT64x4_ADD_DEFINED) -VEC_DOUBLE_ADD(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_ADD_DEFINED) \ + && (defined(VINT64x4_ADD_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_ADD_DEFINED #endif - -#if !defined(VINT64x8_SUB_DEFINED) && defined(VINT64x4_SUB_DEFINED) -VEC_DOUBLE_SUB(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_SUB_DEFINED) \ + && (defined(VINT64x4_SUB_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_SUB_DEFINED #endif - -#if !defined(VINT64x8_MUL_DEFINED) && defined(VINT64x4_MUL_DEFINED) -VEC_DOUBLE_MUL(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_MUL_DEFINED) \ + && (defined(VINT64x4_MUL_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_MUL_DEFINED #endif - -#if !defined(VINT64x8_DIV_DEFINED) && defined(VINT64x4_DIV_DEFINED) -VEC_DOUBLE_DIV(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_DIV_DEFINED) \ + && (defined(VINT64x4_DIV_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_div(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_DIV_DEFINED #endif - -#if !defined(VINT64x8_MOD_DEFINED) && defined(VINT64x4_MOD_DEFINED) -VEC_DOUBLE_MOD(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_MOD_DEFINED) \ + && (defined(VINT64x4_MOD_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_mod(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_MOD_DEFINED #endif - -#if !defined(VINT64x8_AVG_DEFINED) && defined(VINT64x4_AVG_DEFINED) -VEC_DOUBLE_AVG(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_AVG_DEFINED) \ + && (defined(VINT64x4_AVG_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_AVG_DEFINED #endif - -#if !defined(VINT64x8_AND_DEFINED) && defined(VINT64x4_AND_DEFINED) -VEC_DOUBLE_AND(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_AND_DEFINED) \ + && (defined(VINT64x4_AND_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_AND_DEFINED #endif - -#if !defined(VINT64x8_OR_DEFINED) && defined(VINT64x4_OR_DEFINED) -VEC_DOUBLE_OR(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_OR_DEFINED) \ + && (defined(VINT64x4_OR_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_OR_DEFINED #endif - -#if !defined(VINT64x8_XOR_DEFINED) && defined(VINT64x4_XOR_DEFINED) -VEC_DOUBLE_XOR(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_XOR_DEFINED) \ + && (defined(VINT64x4_XOR_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_XOR_DEFINED #endif - -#if !defined(VINT64x8_NOT_DEFINED) && defined(VINT64x4_NOT_DEFINED) -VEC_DOUBLE_NOT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_NOT_DEFINED) \ + && (defined(VINT64x4_NOT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec) +{ + vec.dbl[0] = vint64x4_not(vec.dbl[0]); + vec1.dbl[1] = vint64x4_not(vec.dbl[1]); + return vec; +} # define VINT64x8_NOT_DEFINED #endif - -#if !defined(VINT64x8_CMPLT_DEFINED) && defined(VINT64x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_CMPLT_DEFINED) \ + && (defined(VINT64x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_CMPLT_DEFINED #endif - -#if !defined(VINT64x8_CMPEQ_DEFINED) && defined(VINT64x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_CMPEQ_DEFINED) \ + && (defined(VINT64x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_CMPEQ_DEFINED #endif - -#if !defined(VINT64x8_CMPGT_DEFINED) && defined(VINT64x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_CMPGT_DEFINED) \ + && (defined(VINT64x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_CMPGT_DEFINED #endif - -#if !defined(VINT64x8_CMPLE_DEFINED) && defined(VINT64x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_CMPLE_DEFINED) \ + && (defined(VINT64x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_CMPLE_DEFINED #endif - -#if !defined(VINT64x8_CMPGE_DEFINED) && defined(VINT64x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_CMPGE_DEFINED) \ + && (defined(VINT64x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_CMPGE_DEFINED #endif - -#if !defined(VINT64x8_MIN_DEFINED) && defined(VINT64x4_MIN_DEFINED) -VEC_DOUBLE_MIN(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_MIN_DEFINED) \ + && (defined(VINT64x4_MIN_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_MIN_DEFINED #endif - -#if !defined(VINT64x8_MAX_DEFINED) && defined(VINT64x4_MAX_DEFINED) -VEC_DOUBLE_MAX(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_MAX_DEFINED) \ + && (defined(VINT64x4_MAX_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_MAX_DEFINED #endif - -#if !defined(VINT64x8_RSHIFT_DEFINED) && defined(VINT64x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_RSHIFT_DEFINED) \ + && (defined(VINT64x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_RSHIFT_DEFINED #endif - -#if !defined(VINT64x8_LRSHIFT_DEFINED) && defined(VINT64x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_LRSHIFT_DEFINED) \ + && (defined(VINT64x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_LRSHIFT_DEFINED #endif - -#if !defined(VINT64x8_LSHIFT_DEFINED) && defined(VINT64x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(/* nothing */, 64, 8, 4) +#if !defined(VINT64x8_LSHIFT_DEFINED) \ + && (defined(VINT64x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vint64x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vint64x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VINT64x8_LSHIFT_DEFINED #endif - - - -/* vint64x8 */ - -#if !defined(VUINT64x8_SPLAT_DEFINED) && defined(VUINT64x4_SPLAT_DEFINED) -VEC_DOUBLE_SPLAT(u, 64, 8, 4) +#if !defined(VUINT64x8_SPLAT_DEFINED) \ + && (defined(VUINT64x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x) +{ + vuint64x8 vec; + vec.dbl[0] = vuint64x4_splat(x); + vec.dbl[1] = vuint64x4_splat(x); + return vec; +} # define VUINT64x8_SPLAT_DEFINED #endif - -#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) && defined(VUINT64x4_LOAD_ALIGNED_DEFINED) -VEC_DOUBLE_LOAD_ALIGNED(u, 64, 8, 4) +#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VUINT64x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8]) +{ + vuint64x8 vec; + vec.dbl[0] = vuint64x4_load_aligned(x); + vec.dbl[1] = vuint64x4_load_aligned(x + 4); + return vec; +} # define VUINT64x8_LOAD_ALIGNED_DEFINED #endif - -#if !defined(VUINT64x8_LOAD_DEFINED) && defined(VUINT64x4_LOAD_DEFINED) -VEC_DOUBLE_LOAD(u, 64, 8, 4) +#if !defined(VUINT64x8_LOAD_DEFINED) \ + && (defined(VUINT64x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8]) +{ + vuint64x8 vec; + vec.dbl[0] = vuint64x4_load(x); + vec.dbl[1] = vuint64x4_load(x + 4); + return vec; +} # define VUINT64x8_LOAD_DEFINED #endif - -#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) && defined(VUINT64x4_STORE_ALIGNED_DEFINED) -VEC_DOUBLE_STORE_ALIGNED(u, 64, 8, 4) +#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) \ + && (defined(VUINT64x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8]) +{ + vuint64x4_store_aligned(vec.dbl[0], x); + vuint64x4_store_aligned(vec.dbl[1], x + 4); +} # define VUINT64x8_STORE_ALIGNED_DEFINED #endif - -#if !defined(VUINT64x8_STORE_DEFINED) && defined(VUINT64x4_STORE_DEFINED) -VEC_DOUBLE_STORE(u, 64, 8, 4) +#if !defined(VUINT64x8_STORE_DEFINED) \ + && (defined(VUINT64x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8]) +{ + vuint64x4_store(vec.dbl[0], x); + vuint64x4_store(vec.dbl[1], x + 4); +} # define VUINT64x8_STORE_DEFINED #endif - -#if !defined(VUINT64x8_ADD_DEFINED) && defined(VUINT64x4_ADD_DEFINED) -VEC_DOUBLE_ADD(u, 64, 8, 4) +#if !defined(VUINT64x8_ADD_DEFINED) \ + && (defined(VUINT64x4_ADD_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_ADD_DEFINED #endif - -#if !defined(VUINT64x8_SUB_DEFINED) && defined(VUINT64x4_SUB_DEFINED) -VEC_DOUBLE_SUB(u, 64, 8, 4) +#if !defined(VUINT64x8_SUB_DEFINED) \ + && (defined(VUINT64x4_SUB_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_SUB_DEFINED #endif - -#if !defined(VUINT64x8_MUL_DEFINED) && defined(VUINT64x4_MUL_DEFINED) -VEC_DOUBLE_MUL(u, 64, 8, 4) +#if !defined(VUINT64x8_MUL_DEFINED) \ + && (defined(VUINT64x4_MUL_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_MUL_DEFINED #endif - -#if !defined(VUINT64x8_DIV_DEFINED) && defined(VUINT64x4_DIV_DEFINED) -VEC_DOUBLE_DIV(u, 64, 8, 4) +#if !defined(VUINT64x8_DIV_DEFINED) \ + && (defined(VUINT64x4_DIV_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_div(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_DIV_DEFINED #endif - -#if !defined(VUINT64x8_MOD_DEFINED) && defined(VUINT64x4_MOD_DEFINED) -VEC_DOUBLE_MOD(u, 64, 8, 4) +#if !defined(VUINT64x8_MOD_DEFINED) \ + && (defined(VUINT64x4_MOD_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_mod(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_MOD_DEFINED #endif - -#if !defined(VUINT64x8_AVG_DEFINED) && defined(VUINT64x4_AVG_DEFINED) -VEC_DOUBLE_AVG(u, 64, 8, 4) +#if !defined(VUINT64x8_AVG_DEFINED) \ + && (defined(VUINT64x4_AVG_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_AVG_DEFINED #endif - -#if !defined(VUINT64x8_AND_DEFINED) && defined(VUINT64x4_AND_DEFINED) -VEC_DOUBLE_AND(u, 64, 8, 4) +#if !defined(VUINT64x8_AND_DEFINED) \ + && (defined(VUINT64x4_AND_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_and(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_and(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_AND_DEFINED #endif - -#if !defined(VUINT64x8_OR_DEFINED) && defined(VUINT64x4_OR_DEFINED) -VEC_DOUBLE_OR(u, 64, 8, 4) +#if !defined(VUINT64x8_OR_DEFINED) \ + && (defined(VUINT64x4_OR_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_or(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_or(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_OR_DEFINED #endif - -#if !defined(VUINT64x8_XOR_DEFINED) && defined(VUINT64x4_XOR_DEFINED) -VEC_DOUBLE_XOR(u, 64, 8, 4) +#if !defined(VUINT64x8_XOR_DEFINED) \ + && (defined(VUINT64x4_XOR_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_xor(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_xor(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_XOR_DEFINED #endif - -#if !defined(VUINT64x8_NOT_DEFINED) && defined(VUINT64x4_NOT_DEFINED) -VEC_DOUBLE_NOT(u, 64, 8, 4) +#if !defined(VUINT64x8_NOT_DEFINED) \ + && (defined(VUINT64x4_NOT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec) +{ + vec.dbl[0] = vuint64x4_not(vec.dbl[0]); + vec1.dbl[1] = vuint64x4_not(vec.dbl[1]); + return vec; +} # define VUINT64x8_NOT_DEFINED #endif - -#if !defined(VUINT64x8_CMPLT_DEFINED) && defined(VUINT64x4_CMPLT_DEFINED) -VEC_DOUBLE_CMPLT(u, 64, 8, 4) +#if !defined(VUINT64x8_CMPLT_DEFINED) \ + && (defined(VUINT64x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_CMPLT_DEFINED #endif - -#if !defined(VUINT64x8_CMPEQ_DEFINED) && defined(VUINT64x4_CMPEQ_DEFINED) -VEC_DOUBLE_CMPEQ(u, 64, 8, 4) +#if !defined(VUINT64x8_CMPEQ_DEFINED) \ + && (defined(VUINT64x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_CMPEQ_DEFINED #endif - -#if !defined(VUINT64x8_CMPGT_DEFINED) && defined(VUINT64x4_CMPGT_DEFINED) -VEC_DOUBLE_CMPGT(u, 64, 8, 4) +#if !defined(VUINT64x8_CMPGT_DEFINED) \ + && (defined(VUINT64x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_CMPGT_DEFINED #endif - -#if !defined(VUINT64x8_CMPLE_DEFINED) && defined(VUINT64x4_CMPLE_DEFINED) -VEC_DOUBLE_CMPLE(u, 64, 8, 4) +#if !defined(VUINT64x8_CMPLE_DEFINED) \ + && (defined(VUINT64x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_CMPLE_DEFINED #endif - -#if !defined(VUINT64x8_CMPGE_DEFINED) && defined(VUINT64x4_CMPGE_DEFINED) -VEC_DOUBLE_CMPGE(u, 64, 8, 4) +#if !defined(VUINT64x8_CMPGE_DEFINED) \ + && (defined(VUINT64x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_CMPGE_DEFINED #endif - -#if !defined(VUINT64x8_MIN_DEFINED) && defined(VUINT64x4_MIN_DEFINED) -VEC_DOUBLE_MIN(u, 64, 8, 4) +#if !defined(VUINT64x8_MIN_DEFINED) \ + && (defined(VUINT64x4_MIN_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_MIN_DEFINED #endif - -#if !defined(VUINT64x8_MAX_DEFINED) && defined(VUINT64x4_MAX_DEFINED) -VEC_DOUBLE_MAX(u, 64, 8, 4) +#if !defined(VUINT64x8_MAX_DEFINED) \ + && (defined(VUINT64x4_MAX_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_MAX_DEFINED #endif - -#if !defined(VUINT64x8_RSHIFT_DEFINED) && defined(VUINT64x4_RSHIFT_DEFINED) -VEC_DOUBLE_RSHIFT(u, 64, 8, 4) +#if !defined(VUINT64x8_RSHIFT_DEFINED) \ + && (defined(VUINT64x4_RSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_rshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_rshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_RSHIFT_DEFINED #endif - -#if !defined(VUINT64x8_LRSHIFT_DEFINED) && defined(VUINT64x4_LRSHIFT_DEFINED) -VEC_DOUBLE_LRSHIFT(u, 64, 8, 4) +#if !defined(VUINT64x8_LRSHIFT_DEFINED) \ + && (defined(VUINT64x4_LRSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_lrshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_lrshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_LRSHIFT_DEFINED #endif - -#if !defined(VUINT64x8_LSHIFT_DEFINED) && defined(VUINT64x4_LSHIFT_DEFINED) -VEC_DOUBLE_LSHIFT(u, 64, 8, 4) +#if !defined(VUINT64x8_LSHIFT_DEFINED) \ + && (defined(VUINT64x4_LSHIFT_DEFINED)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.dbl[0] = vuint64x4_lshift(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vuint64x4_lshift(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} # define VUINT64x8_LSHIFT_DEFINED #endif - - +#if !defined(VF32x2_SPLAT_DEFINED) \ + && (defined(VF32x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x) +{ + vf32x2 vec; + vec.dbl[0] = vf32x1_splat(x); + vec.dbl[1] = vf32x1_splat(x); + return vec; +} +# define VF32x2_SPLAT_DEFINED +#endif +#if !defined(VF32x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VF32x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2]) +{ + vf32x2 vec; + vec.dbl[0] = vf32x1_load_aligned(x); + vec.dbl[1] = vf32x1_load_aligned(x + 1); + return vec; +} +# define VF32x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_LOAD_DEFINED) \ + && (defined(VF32x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2]) +{ + vf32x2 vec; + vec.dbl[0] = vf32x1_load(x); + vec.dbl[1] = vf32x1_load(x + 1); + return vec; +} +# define VF32x2_LOAD_DEFINED +#endif +#if !defined(VF32x2_STORE_ALIGNED_DEFINED) \ + && (defined(VF32x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2]) +{ + vf32x1_store_aligned(vec.dbl[0], x); + vf32x1_store_aligned(vec.dbl[1], x + 1); +} +# define VF32x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_STORE_DEFINED) \ + && (defined(VF32x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2]) +{ + vf32x1_store(vec.dbl[0], x); + vf32x1_store(vec.dbl[1], x + 1); +} +# define VF32x2_STORE_DEFINED +#endif +#if !defined(VF32x2_ADD_DEFINED) \ + && (defined(VF32x1_ADD_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_ADD_DEFINED +#endif +#if !defined(VF32x2_SUB_DEFINED) \ + && (defined(VF32x1_SUB_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_SUB_DEFINED +#endif +#if !defined(VF32x2_MUL_DEFINED) \ + && (defined(VF32x1_MUL_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_MUL_DEFINED +#endif +#if !defined(VF32x2_DIV_DEFINED) \ + && (defined(VF32x1_DIV_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_div(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_DIV_DEFINED +#endif +#if !defined(VF32x2_MOD_DEFINED) \ + && (defined(VF32x1_MOD_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_mod(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_MOD_DEFINED +#endif +#if !defined(VF32x2_AVG_DEFINED) \ + && (defined(VF32x1_AVG_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_AVG_DEFINED +#endif +#if !defined(VF32x2_CMPLT_DEFINED) \ + && (defined(VF32x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_CMPLT_DEFINED +#endif +#if !defined(VF32x2_CMPEQ_DEFINED) \ + && (defined(VF32x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_CMPEQ_DEFINED +#endif +#if !defined(VF32x2_CMPGT_DEFINED) \ + && (defined(VF32x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_CMPGT_DEFINED +#endif +#if !defined(VF32x2_CMPLE_DEFINED) \ + && (defined(VF32x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_CMPLE_DEFINED +#endif +#if !defined(VF32x2_CMPGE_DEFINED) \ + && (defined(VF32x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_CMPGE_DEFINED +#endif +#if !defined(VF32x2_MIN_DEFINED) \ + && (defined(VF32x1_MIN_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_min(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_MIN_DEFINED +#endif +#if !defined(VF32x2_MAX_DEFINED) \ + && (defined(VF32x1_MAX_DEFINED)) +VEC_FUNC_IMPL vf32x2 vf32x2_max(vf32x2 vec1, vf32x2 vec2) +{ + vec1.dbl[0] = vf32x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x2_MAX_DEFINED +#endif +#if !defined(VF32x4_SPLAT_DEFINED) \ + && (defined(VF32x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x) +{ + vf32x4 vec; + vec.dbl[0] = vf32x2_splat(x); + vec.dbl[1] = vf32x2_splat(x); + return vec; +} +# define VF32x4_SPLAT_DEFINED +#endif +#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VF32x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.dbl[0] = vf32x2_load_aligned(x); + vec.dbl[1] = vf32x2_load_aligned(x + 2); + return vec; +} +# define VF32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_LOAD_DEFINED) \ + && (defined(VF32x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.dbl[0] = vf32x2_load(x); + vec.dbl[1] = vf32x2_load(x + 2); + return vec; +} +# define VF32x4_LOAD_DEFINED +#endif +#if !defined(VF32x4_STORE_ALIGNED_DEFINED) \ + && (defined(VF32x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4]) +{ + vf32x2_store_aligned(vec.dbl[0], x); + vf32x2_store_aligned(vec.dbl[1], x + 2); +} +# define VF32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_STORE_DEFINED) \ + && (defined(VF32x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4]) +{ + vf32x2_store(vec.dbl[0], x); + vf32x2_store(vec.dbl[1], x + 2); +} +# define VF32x4_STORE_DEFINED +#endif +#if !defined(VF32x4_ADD_DEFINED) \ + && (defined(VF32x2_ADD_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_ADD_DEFINED +#endif +#if !defined(VF32x4_SUB_DEFINED) \ + && (defined(VF32x2_SUB_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_SUB_DEFINED +#endif +#if !defined(VF32x4_MUL_DEFINED) \ + && (defined(VF32x2_MUL_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_MUL_DEFINED +#endif +#if !defined(VF32x4_DIV_DEFINED) \ + && (defined(VF32x2_DIV_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_DIV_DEFINED +#endif +#if !defined(VF32x4_MOD_DEFINED) \ + && (defined(VF32x2_MOD_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_mod(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_MOD_DEFINED +#endif +#if !defined(VF32x4_AVG_DEFINED) \ + && (defined(VF32x2_AVG_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_AVG_DEFINED +#endif +#if !defined(VF32x4_CMPLT_DEFINED) \ + && (defined(VF32x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_CMPLT_DEFINED +#endif +#if !defined(VF32x4_CMPEQ_DEFINED) \ + && (defined(VF32x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_CMPEQ_DEFINED +#endif +#if !defined(VF32x4_CMPGT_DEFINED) \ + && (defined(VF32x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_CMPGT_DEFINED +#endif +#if !defined(VF32x4_CMPLE_DEFINED) \ + && (defined(VF32x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_CMPLE_DEFINED +#endif +#if !defined(VF32x4_CMPGE_DEFINED) \ + && (defined(VF32x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_CMPGE_DEFINED +#endif +#if !defined(VF32x4_MIN_DEFINED) \ + && (defined(VF32x2_MIN_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_MIN_DEFINED +#endif +#if !defined(VF32x4_MAX_DEFINED) \ + && (defined(VF32x2_MAX_DEFINED)) +VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2) +{ + vec1.dbl[0] = vf32x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x4_MAX_DEFINED +#endif +#if !defined(VF32x8_SPLAT_DEFINED) \ + && (defined(VF32x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x) +{ + vf32x8 vec; + vec.dbl[0] = vf32x4_splat(x); + vec.dbl[1] = vf32x4_splat(x); + return vec; +} +# define VF32x8_SPLAT_DEFINED +#endif +#if !defined(VF32x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VF32x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8]) +{ + vf32x8 vec; + vec.dbl[0] = vf32x4_load_aligned(x); + vec.dbl[1] = vf32x4_load_aligned(x + 4); + return vec; +} +# define VF32x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_LOAD_DEFINED) \ + && (defined(VF32x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8]) +{ + vf32x8 vec; + vec.dbl[0] = vf32x4_load(x); + vec.dbl[1] = vf32x4_load(x + 4); + return vec; +} +# define VF32x8_LOAD_DEFINED +#endif +#if !defined(VF32x8_STORE_ALIGNED_DEFINED) \ + && (defined(VF32x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8]) +{ + vf32x4_store_aligned(vec.dbl[0], x); + vf32x4_store_aligned(vec.dbl[1], x + 4); +} +# define VF32x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_STORE_DEFINED) \ + && (defined(VF32x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8]) +{ + vf32x4_store(vec.dbl[0], x); + vf32x4_store(vec.dbl[1], x + 4); +} +# define VF32x8_STORE_DEFINED +#endif +#if !defined(VF32x8_ADD_DEFINED) \ + && (defined(VF32x4_ADD_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_ADD_DEFINED +#endif +#if !defined(VF32x8_SUB_DEFINED) \ + && (defined(VF32x4_SUB_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_SUB_DEFINED +#endif +#if !defined(VF32x8_MUL_DEFINED) \ + && (defined(VF32x4_MUL_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_MUL_DEFINED +#endif +#if !defined(VF32x8_DIV_DEFINED) \ + && (defined(VF32x4_DIV_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_div(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_DIV_DEFINED +#endif +#if !defined(VF32x8_MOD_DEFINED) \ + && (defined(VF32x4_MOD_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_mod(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_MOD_DEFINED +#endif +#if !defined(VF32x8_AVG_DEFINED) \ + && (defined(VF32x4_AVG_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_AVG_DEFINED +#endif +#if !defined(VF32x8_CMPLT_DEFINED) \ + && (defined(VF32x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_CMPLT_DEFINED +#endif +#if !defined(VF32x8_CMPEQ_DEFINED) \ + && (defined(VF32x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_CMPEQ_DEFINED +#endif +#if !defined(VF32x8_CMPGT_DEFINED) \ + && (defined(VF32x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_CMPGT_DEFINED +#endif +#if !defined(VF32x8_CMPLE_DEFINED) \ + && (defined(VF32x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_CMPLE_DEFINED +#endif +#if !defined(VF32x8_CMPGE_DEFINED) \ + && (defined(VF32x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_CMPGE_DEFINED +#endif +#if !defined(VF32x8_MIN_DEFINED) \ + && (defined(VF32x4_MIN_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_min(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_MIN_DEFINED +#endif +#if !defined(VF32x8_MAX_DEFINED) \ + && (defined(VF32x4_MAX_DEFINED)) +VEC_FUNC_IMPL vf32x8 vf32x8_max(vf32x8 vec1, vf32x8 vec2) +{ + vec1.dbl[0] = vf32x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x8_MAX_DEFINED +#endif +#if !defined(VF32x16_SPLAT_DEFINED) \ + && (defined(VF32x8_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x) +{ + vf32x16 vec; + vec.dbl[0] = vf32x8_splat(x); + vec.dbl[1] = vf32x8_splat(x); + return vec; +} +# define VF32x16_SPLAT_DEFINED +#endif +#if !defined(VF32x16_LOAD_ALIGNED_DEFINED) \ + && (defined(VF32x8_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16]) +{ + vf32x16 vec; + vec.dbl[0] = vf32x8_load_aligned(x); + vec.dbl[1] = vf32x8_load_aligned(x + 8); + return vec; +} +# define VF32x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_LOAD_DEFINED) \ + && (defined(VF32x8_LOAD_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16]) +{ + vf32x16 vec; + vec.dbl[0] = vf32x8_load(x); + vec.dbl[1] = vf32x8_load(x + 8); + return vec; +} +# define VF32x16_LOAD_DEFINED +#endif +#if !defined(VF32x16_STORE_ALIGNED_DEFINED) \ + && (defined(VF32x8_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16]) +{ + vf32x8_store_aligned(vec.dbl[0], x); + vf32x8_store_aligned(vec.dbl[1], x + 8); +} +# define VF32x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_STORE_DEFINED) \ + && (defined(VF32x8_STORE_DEFINED)) +VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16]) +{ + vf32x8_store(vec.dbl[0], x); + vf32x8_store(vec.dbl[1], x + 8); +} +# define VF32x16_STORE_DEFINED +#endif +#if !defined(VF32x16_ADD_DEFINED) \ + && (defined(VF32x8_ADD_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_ADD_DEFINED +#endif +#if !defined(VF32x16_SUB_DEFINED) \ + && (defined(VF32x8_SUB_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_SUB_DEFINED +#endif +#if !defined(VF32x16_MUL_DEFINED) \ + && (defined(VF32x8_MUL_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_MUL_DEFINED +#endif +#if !defined(VF32x16_DIV_DEFINED) \ + && (defined(VF32x8_DIV_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_div(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_DIV_DEFINED +#endif +#if !defined(VF32x16_MOD_DEFINED) \ + && (defined(VF32x8_MOD_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_mod(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_MOD_DEFINED +#endif +#if !defined(VF32x16_AVG_DEFINED) \ + && (defined(VF32x8_AVG_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_AVG_DEFINED +#endif +#if !defined(VF32x16_CMPLT_DEFINED) \ + && (defined(VF32x8_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_CMPLT_DEFINED +#endif +#if !defined(VF32x16_CMPEQ_DEFINED) \ + && (defined(VF32x8_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_CMPEQ_DEFINED +#endif +#if !defined(VF32x16_CMPGT_DEFINED) \ + && (defined(VF32x8_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_CMPGT_DEFINED +#endif +#if !defined(VF32x16_CMPLE_DEFINED) \ + && (defined(VF32x8_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_CMPLE_DEFINED +#endif +#if !defined(VF32x16_CMPGE_DEFINED) \ + && (defined(VF32x8_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_CMPGE_DEFINED +#endif +#if !defined(VF32x16_MIN_DEFINED) \ + && (defined(VF32x8_MIN_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_min(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_MIN_DEFINED +#endif +#if !defined(VF32x16_MAX_DEFINED) \ + && (defined(VF32x8_MAX_DEFINED)) +VEC_FUNC_IMPL vf32x16 vf32x16_max(vf32x16 vec1, vf32x16 vec2) +{ + vec1.dbl[0] = vf32x8_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf32x8_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF32x16_MAX_DEFINED +#endif +#if !defined(VF64x2_SPLAT_DEFINED) \ + && (defined(VF64x1_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x) +{ + vf64x2 vec; + vec.dbl[0] = vf64x1_splat(x); + vec.dbl[1] = vf64x1_splat(x); + return vec; +} +# define VF64x2_SPLAT_DEFINED +#endif +#if !defined(VF64x2_LOAD_ALIGNED_DEFINED) \ + && (defined(VF64x1_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2]) +{ + vf64x2 vec; + vec.dbl[0] = vf64x1_load_aligned(x); + vec.dbl[1] = vf64x1_load_aligned(x + 1); + return vec; +} +# define VF64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_LOAD_DEFINED) \ + && (defined(VF64x1_LOAD_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2]) +{ + vf64x2 vec; + vec.dbl[0] = vf64x1_load(x); + vec.dbl[1] = vf64x1_load(x + 1); + return vec; +} +# define VF64x2_LOAD_DEFINED +#endif +#if !defined(VF64x2_STORE_ALIGNED_DEFINED) \ + && (defined(VF64x1_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2]) +{ + vf64x1_store_aligned(vec.dbl[0], x); + vf64x1_store_aligned(vec.dbl[1], x + 1); +} +# define VF64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_STORE_DEFINED) \ + && (defined(VF64x1_STORE_DEFINED)) +VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2]) +{ + vf64x1_store(vec.dbl[0], x); + vf64x1_store(vec.dbl[1], x + 1); +} +# define VF64x2_STORE_DEFINED +#endif +#if !defined(VF64x2_ADD_DEFINED) \ + && (defined(VF64x1_ADD_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_ADD_DEFINED +#endif +#if !defined(VF64x2_SUB_DEFINED) \ + && (defined(VF64x1_SUB_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_SUB_DEFINED +#endif +#if !defined(VF64x2_MUL_DEFINED) \ + && (defined(VF64x1_MUL_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_MUL_DEFINED +#endif +#if !defined(VF64x2_DIV_DEFINED) \ + && (defined(VF64x1_DIV_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_DIV_DEFINED +#endif +#if !defined(VF64x2_MOD_DEFINED) \ + && (defined(VF64x1_MOD_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_mod(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_MOD_DEFINED +#endif +#if !defined(VF64x2_AVG_DEFINED) \ + && (defined(VF64x1_AVG_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_AVG_DEFINED +#endif +#if !defined(VF64x2_CMPLT_DEFINED) \ + && (defined(VF64x1_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_CMPLT_DEFINED +#endif +#if !defined(VF64x2_CMPEQ_DEFINED) \ + && (defined(VF64x1_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_CMPEQ_DEFINED +#endif +#if !defined(VF64x2_CMPGT_DEFINED) \ + && (defined(VF64x1_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_CMPGT_DEFINED +#endif +#if !defined(VF64x2_CMPLE_DEFINED) \ + && (defined(VF64x1_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_CMPLE_DEFINED +#endif +#if !defined(VF64x2_CMPGE_DEFINED) \ + && (defined(VF64x1_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_CMPGE_DEFINED +#endif +#if !defined(VF64x2_MIN_DEFINED) \ + && (defined(VF64x1_MIN_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_MIN_DEFINED +#endif +#if !defined(VF64x2_MAX_DEFINED) \ + && (defined(VF64x1_MAX_DEFINED)) +VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2) +{ + vec1.dbl[0] = vf64x1_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x1_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x2_MAX_DEFINED +#endif +#if !defined(VF64x4_SPLAT_DEFINED) \ + && (defined(VF64x2_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x) +{ + vf64x4 vec; + vec.dbl[0] = vf64x2_splat(x); + vec.dbl[1] = vf64x2_splat(x); + return vec; +} +# define VF64x4_SPLAT_DEFINED +#endif +#if !defined(VF64x4_LOAD_ALIGNED_DEFINED) \ + && (defined(VF64x2_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4]) +{ + vf64x4 vec; + vec.dbl[0] = vf64x2_load_aligned(x); + vec.dbl[1] = vf64x2_load_aligned(x + 2); + return vec; +} +# define VF64x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_LOAD_DEFINED) \ + && (defined(VF64x2_LOAD_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4]) +{ + vf64x4 vec; + vec.dbl[0] = vf64x2_load(x); + vec.dbl[1] = vf64x2_load(x + 2); + return vec; +} +# define VF64x4_LOAD_DEFINED +#endif +#if !defined(VF64x4_STORE_ALIGNED_DEFINED) \ + && (defined(VF64x2_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4]) +{ + vf64x2_store_aligned(vec.dbl[0], x); + vf64x2_store_aligned(vec.dbl[1], x + 2); +} +# define VF64x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_STORE_DEFINED) \ + && (defined(VF64x2_STORE_DEFINED)) +VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4]) +{ + vf64x2_store(vec.dbl[0], x); + vf64x2_store(vec.dbl[1], x + 2); +} +# define VF64x4_STORE_DEFINED +#endif +#if !defined(VF64x4_ADD_DEFINED) \ + && (defined(VF64x2_ADD_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_ADD_DEFINED +#endif +#if !defined(VF64x4_SUB_DEFINED) \ + && (defined(VF64x2_SUB_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_SUB_DEFINED +#endif +#if !defined(VF64x4_MUL_DEFINED) \ + && (defined(VF64x2_MUL_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_MUL_DEFINED +#endif +#if !defined(VF64x4_DIV_DEFINED) \ + && (defined(VF64x2_DIV_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_div(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_DIV_DEFINED +#endif +#if !defined(VF64x4_MOD_DEFINED) \ + && (defined(VF64x2_MOD_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_mod(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_MOD_DEFINED +#endif +#if !defined(VF64x4_AVG_DEFINED) \ + && (defined(VF64x2_AVG_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_AVG_DEFINED +#endif +#if !defined(VF64x4_CMPLT_DEFINED) \ + && (defined(VF64x2_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_CMPLT_DEFINED +#endif +#if !defined(VF64x4_CMPEQ_DEFINED) \ + && (defined(VF64x2_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_CMPEQ_DEFINED +#endif +#if !defined(VF64x4_CMPGT_DEFINED) \ + && (defined(VF64x2_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_CMPGT_DEFINED +#endif +#if !defined(VF64x4_CMPLE_DEFINED) \ + && (defined(VF64x2_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_CMPLE_DEFINED +#endif +#if !defined(VF64x4_CMPGE_DEFINED) \ + && (defined(VF64x2_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_CMPGE_DEFINED +#endif +#if !defined(VF64x4_MIN_DEFINED) \ + && (defined(VF64x2_MIN_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_min(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_MIN_DEFINED +#endif +#if !defined(VF64x4_MAX_DEFINED) \ + && (defined(VF64x2_MAX_DEFINED)) +VEC_FUNC_IMPL vf64x4 vf64x4_max(vf64x4 vec1, vf64x4 vec2) +{ + vec1.dbl[0] = vf64x2_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x2_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x4_MAX_DEFINED +#endif +#if !defined(VF64x8_SPLAT_DEFINED) \ + && (defined(VF64x4_SPLAT_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x) +{ + vf64x8 vec; + vec.dbl[0] = vf64x4_splat(x); + vec.dbl[1] = vf64x4_splat(x); + return vec; +} +# define VF64x8_SPLAT_DEFINED +#endif +#if !defined(VF64x8_LOAD_ALIGNED_DEFINED) \ + && (defined(VF64x4_LOAD_ALIGNED_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8]) +{ + vf64x8 vec; + vec.dbl[0] = vf64x4_load_aligned(x); + vec.dbl[1] = vf64x4_load_aligned(x + 4); + return vec; +} +# define VF64x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_LOAD_DEFINED) \ + && (defined(VF64x4_LOAD_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8]) +{ + vf64x8 vec; + vec.dbl[0] = vf64x4_load(x); + vec.dbl[1] = vf64x4_load(x + 4); + return vec; +} +# define VF64x8_LOAD_DEFINED +#endif +#if !defined(VF64x8_STORE_ALIGNED_DEFINED) \ + && (defined(VF64x4_STORE_ALIGNED_DEFINED)) +VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8]) +{ + vf64x4_store_aligned(vec.dbl[0], x); + vf64x4_store_aligned(vec.dbl[1], x + 4); +} +# define VF64x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_STORE_DEFINED) \ + && (defined(VF64x4_STORE_DEFINED)) +VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8]) +{ + vf64x4_store(vec.dbl[0], x); + vf64x4_store(vec.dbl[1], x + 4); +} +# define VF64x8_STORE_DEFINED +#endif +#if !defined(VF64x8_ADD_DEFINED) \ + && (defined(VF64x4_ADD_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_add(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_add(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_ADD_DEFINED +#endif +#if !defined(VF64x8_SUB_DEFINED) \ + && (defined(VF64x4_SUB_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_sub(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_sub(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_SUB_DEFINED +#endif +#if !defined(VF64x8_MUL_DEFINED) \ + && (defined(VF64x4_MUL_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_mul(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_mul(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_MUL_DEFINED +#endif +#if !defined(VF64x8_DIV_DEFINED) \ + && (defined(VF64x4_DIV_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_div(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_div(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_div(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_DIV_DEFINED +#endif +#if !defined(VF64x8_MOD_DEFINED) \ + && (defined(VF64x4_MOD_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_mod(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_mod(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_mod(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_MOD_DEFINED +#endif +#if !defined(VF64x8_AVG_DEFINED) \ + && (defined(VF64x4_AVG_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_avg(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_avg(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_AVG_DEFINED +#endif +#if !defined(VF64x8_CMPLT_DEFINED) \ + && (defined(VF64x4_CMPLT_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_cmplt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_cmplt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_CMPLT_DEFINED +#endif +#if !defined(VF64x8_CMPEQ_DEFINED) \ + && (defined(VF64x4_CMPEQ_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_CMPEQ_DEFINED +#endif +#if !defined(VF64x8_CMPGT_DEFINED) \ + && (defined(VF64x4_CMPGT_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_CMPGT_DEFINED +#endif +#if !defined(VF64x8_CMPLE_DEFINED) \ + && (defined(VF64x4_CMPLE_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_cmple(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_cmple(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_CMPLE_DEFINED +#endif +#if !defined(VF64x8_CMPGE_DEFINED) \ + && (defined(VF64x4_CMPGE_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_cmpge(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_cmpge(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_CMPGE_DEFINED +#endif +#if !defined(VF64x8_MIN_DEFINED) \ + && (defined(VF64x4_MIN_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_min(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_min(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_min(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_MIN_DEFINED +#endif +#if !defined(VF64x8_MAX_DEFINED) \ + && (defined(VF64x4_MAX_DEFINED)) +VEC_FUNC_IMPL vf64x8 vf64x8_max(vf64x8 vec1, vf64x8 vec2) +{ + vec1.dbl[0] = vf64x4_max(vec1.dbl[0], vec2.dbl[0]); + vec1.dbl[1] = vf64x4_max(vec1.dbl[1], vec2.dbl[1]); + return vec1; +} +# define VF64x8_MAX_DEFINED +#endif
--- a/include/vec/impl/gcc.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/gcc.h Wed Apr 30 18:36:38 2025 -0400 @@ -25,24 +25,20 @@ /* This file is automatically generated! Do not edit it directly! * Edit the code that generates it in utils/gengcc.c --paper */ -#ifndef VEC_IMPL_GCC_H_ -#define VEC_IMPL_GCC_H_ - - +/* ------------------------------------------------------------------------ */ +/* PREPROCESSOR HELL INCOMING */ - -/* vuint8x2 */ - -#ifndef VINT8x2_SPLAT_DEFINED +#if !defined(VINT8x2_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x) { vint8x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VINT8x2_SPLAT_DEFINED #endif -#ifndef VINT8x2_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2]) { vint8x2 vec; @@ -51,7 +47,7 @@ } # define VINT8x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x2_LOAD_DEFINED +#if !defined(VINT8x2_LOAD_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2]) { vint8x2 vec; @@ -60,21 +56,21 @@ } # define VINT8x2_LOAD_DEFINED #endif -#ifndef VINT8x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x2_STORE_DEFINED -VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x2_STORE_DEFINED #endif -#ifndef VINT8x2_ADD_DEFINED +#if !defined(VINT8x2_ADD_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -82,7 +78,7 @@ } # define VINT8x2_ADD_DEFINED #endif -#ifndef VINT8x2_SUB_DEFINED +#if !defined(VINT8x2_SUB_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -90,7 +86,7 @@ } # define VINT8x2_SUB_DEFINED #endif -#ifndef VINT8x2_MUL_DEFINED +#if !defined(VINT8x2_MUL_DEFINED) VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -98,106 +94,8 @@ } # define VINT8x2_MUL_DEFINED #endif -#ifndef VINT8x2_AND_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x2_AND_DEFINED -#endif -#ifndef VINT8x2_OR_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x2_OR_DEFINED -#endif -#ifndef VINT8x2_XOR_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x2_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x2_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x2_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x2_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x2_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x2_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_MIN_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2) -{ - vint8x2 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x2_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_MAX_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2) -{ - vint8x2 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x2_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_AVG_DEFINED +#if !defined(VINT8x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2) { vint8x2 ones = vint8x2_splat(1); @@ -211,19 +109,107 @@ } # define VINT8x2_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_RSHIFT_DEFINED +#if !defined(VINT8x2_AND_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x2_AND_DEFINED +#endif +#if !defined(VINT8x2_OR_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x2_OR_DEFINED +#endif +#if !defined(VINT8x2_XOR_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x2_XOR_DEFINED +#endif +#if !defined(VINT8x2_NOT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x2_NOT_DEFINED +#endif +#if !defined(VINT8x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x2_CMPLT_DEFINED +#endif +#if !defined(VINT8x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x2_CMPEQ_DEFINED +#endif +#if !defined(VINT8x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x2_CMPGT_DEFINED +#endif +#if !defined(VINT8x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x2_CMPLE_DEFINED +#endif +#if !defined(VINT8x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x2_CMPGE_DEFINED +#endif +#if !defined(VINT8x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2) +{ + vint8x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x2_MIN_DEFINED +#endif +#if !defined(VINT8x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2) +{ + vint8x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x2_MAX_DEFINED +#endif +#if !defined(VINT8x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -231,9 +217,8 @@ } # define VINT8x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x2_LRSHIFT_DEFINED +#if !defined(VINT8x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc); @@ -241,29 +226,26 @@ } # define VINT8x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x2_NOT_DEFINED -VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x2_NOT_DEFINED -#endif - - -/* vint8x2 */ - -#ifndef VUINT8x2_SPLAT_DEFINED +#if !defined(VINT8x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x2_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x) { vuint8x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VUINT8x2_SPLAT_DEFINED #endif -#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2]) { vuint8x2 vec; @@ -272,7 +254,7 @@ } # define VUINT8x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x2_LOAD_DEFINED +#if !defined(VUINT8x2_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2]) { vuint8x2 vec; @@ -281,21 +263,21 @@ } # define VUINT8x2_LOAD_DEFINED #endif -#ifndef VUINT8x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x2_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x2_STORE_DEFINED #endif -#ifndef VUINT8x2_ADD_DEFINED +#if !defined(VUINT8x2_ADD_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -303,7 +285,7 @@ } # define VUINT8x2_ADD_DEFINED #endif -#ifndef VUINT8x2_SUB_DEFINED +#if !defined(VUINT8x2_SUB_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -311,7 +293,7 @@ } # define VUINT8x2_SUB_DEFINED #endif -#ifndef VUINT8x2_MUL_DEFINED +#if !defined(VUINT8x2_MUL_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -319,7 +301,16 @@ } # define VUINT8x2_MUL_DEFINED #endif -#ifndef VUINT8x2_AND_DEFINED +#if !defined(VUINT8x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x2_AVG_DEFINED +#endif +#if !defined(VUINT8x2_AND_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -327,7 +318,7 @@ } # define VUINT8x2_AND_DEFINED #endif -#ifndef VUINT8x2_OR_DEFINED +#if !defined(VUINT8x2_OR_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -335,7 +326,7 @@ } # define VUINT8x2_OR_DEFINED #endif -#ifndef VUINT8x2_XOR_DEFINED +#if !defined(VUINT8x2_XOR_DEFINED) VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -343,8 +334,16 @@ } # define VUINT8x2_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_CMPLT_DEFINED +#if !defined(VUINT8x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x2_NOT_DEFINED +#endif +#if !defined(VUINT8x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -352,9 +351,8 @@ } # define VUINT8x2_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_CMPEQ_DEFINED +#if !defined(VUINT8x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -362,9 +360,8 @@ } # define VUINT8x2_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_CMPGT_DEFINED +#if !defined(VUINT8x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -372,9 +369,8 @@ } # define VUINT8x2_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_CMPLE_DEFINED +#if !defined(VUINT8x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -382,9 +378,8 @@ } # define VUINT8x2_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_CMPGE_DEFINED +#if !defined(VUINT8x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -392,9 +387,8 @@ } # define VUINT8x2_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_MIN_DEFINED +#if !defined(VUINT8x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2) { vuint8x2 mask; @@ -404,9 +398,8 @@ } # define VUINT8x2_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_MAX_DEFINED +#if !defined(VUINT8x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2) { vuint8x2 mask; @@ -416,30 +409,8 @@ } # define VUINT8x2_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_AVG_DEFINED -VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2) -{ - vint8x2 ones = vint8x2_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x2_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_RSHIFT_DEFINED +#if !defined(VUINT8x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -447,9 +418,8 @@ } # define VUINT8x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x2_LRSHIFT_DEFINED +#if !defined(VUINT8x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc); @@ -457,29 +427,28 @@ } # define VUINT8x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x2_NOT_DEFINED -VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x2_NOT_DEFINED -#endif - - -/* vuint8x4 */ - -#ifndef VINT8x4_SPLAT_DEFINED +#if !defined(VUINT8x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x2_LSHIFT_DEFINED +#endif +#if !defined(VINT8x4_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x) { vint8x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VINT8x4_SPLAT_DEFINED #endif -#ifndef VINT8x4_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4]) { vint8x4 vec; @@ -488,7 +457,7 @@ } # define VINT8x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x4_LOAD_DEFINED +#if !defined(VINT8x4_LOAD_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4]) { vint8x4 vec; @@ -497,21 +466,21 @@ } # define VINT8x4_LOAD_DEFINED #endif -#ifndef VINT8x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x4_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x4_STORE_DEFINED -VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x4_STORE_DEFINED #endif -#ifndef VINT8x4_ADD_DEFINED +#if !defined(VINT8x4_ADD_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -519,7 +488,7 @@ } # define VINT8x4_ADD_DEFINED #endif -#ifndef VINT8x4_SUB_DEFINED +#if !defined(VINT8x4_SUB_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -527,7 +496,7 @@ } # define VINT8x4_SUB_DEFINED #endif -#ifndef VINT8x4_MUL_DEFINED +#if !defined(VINT8x4_MUL_DEFINED) VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -535,106 +504,8 @@ } # define VINT8x4_MUL_DEFINED #endif -#ifndef VINT8x4_AND_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x4_AND_DEFINED -#endif -#ifndef VINT8x4_OR_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x4_OR_DEFINED -#endif -#ifndef VINT8x4_XOR_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x4_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x4_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x4_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x4_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x4_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x4_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_MIN_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2) -{ - vint8x4 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x4_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_MAX_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2) -{ - vint8x4 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x4_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_AVG_DEFINED +#if !defined(VINT8x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2) { vint8x4 ones = vint8x4_splat(1); @@ -648,19 +519,107 @@ } # define VINT8x4_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_RSHIFT_DEFINED +#if !defined(VINT8x4_AND_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x4_AND_DEFINED +#endif +#if !defined(VINT8x4_OR_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x4_OR_DEFINED +#endif +#if !defined(VINT8x4_XOR_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x4_XOR_DEFINED +#endif +#if !defined(VINT8x4_NOT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x4_NOT_DEFINED +#endif +#if !defined(VINT8x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x4_CMPLT_DEFINED +#endif +#if !defined(VINT8x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x4_CMPEQ_DEFINED +#endif +#if !defined(VINT8x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x4_CMPGT_DEFINED +#endif +#if !defined(VINT8x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x4_CMPLE_DEFINED +#endif +#if !defined(VINT8x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x4_CMPGE_DEFINED +#endif +#if !defined(VINT8x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2) +{ + vint8x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x4_MIN_DEFINED +#endif +#if !defined(VINT8x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2) +{ + vint8x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x4_MAX_DEFINED +#endif +#if !defined(VINT8x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -668,9 +627,8 @@ } # define VINT8x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x4_LRSHIFT_DEFINED +#if !defined(VINT8x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); @@ -678,29 +636,28 @@ } # define VINT8x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x4_NOT_DEFINED -VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x4_NOT_DEFINED -#endif - - -/* vint8x4 */ - -#ifndef VUINT8x4_SPLAT_DEFINED +#if !defined(VINT8x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x4_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x) { vuint8x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VUINT8x4_SPLAT_DEFINED #endif -#ifndef VUINT8x4_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4]) { vuint8x4 vec; @@ -709,7 +666,7 @@ } # define VUINT8x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x4_LOAD_DEFINED +#if !defined(VUINT8x4_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4]) { vuint8x4 vec; @@ -718,21 +675,21 @@ } # define VUINT8x4_LOAD_DEFINED #endif -#ifndef VUINT8x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x4_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x4_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x4_STORE_DEFINED #endif -#ifndef VUINT8x4_ADD_DEFINED +#if !defined(VUINT8x4_ADD_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -740,7 +697,7 @@ } # define VUINT8x4_ADD_DEFINED #endif -#ifndef VUINT8x4_SUB_DEFINED +#if !defined(VUINT8x4_SUB_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -748,7 +705,7 @@ } # define VUINT8x4_SUB_DEFINED #endif -#ifndef VUINT8x4_MUL_DEFINED +#if !defined(VUINT8x4_MUL_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -756,7 +713,16 @@ } # define VUINT8x4_MUL_DEFINED #endif -#ifndef VUINT8x4_AND_DEFINED +#if !defined(VUINT8x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x4_AVG_DEFINED +#endif +#if !defined(VUINT8x4_AND_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -764,7 +730,7 @@ } # define VUINT8x4_AND_DEFINED #endif -#ifndef VUINT8x4_OR_DEFINED +#if !defined(VUINT8x4_OR_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -772,7 +738,7 @@ } # define VUINT8x4_OR_DEFINED #endif -#ifndef VUINT8x4_XOR_DEFINED +#if !defined(VUINT8x4_XOR_DEFINED) VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -780,8 +746,16 @@ } # define VUINT8x4_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_CMPLT_DEFINED +#if !defined(VUINT8x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x4_NOT_DEFINED +#endif +#if !defined(VUINT8x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -789,9 +763,8 @@ } # define VUINT8x4_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_CMPEQ_DEFINED +#if !defined(VUINT8x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -799,9 +772,8 @@ } # define VUINT8x4_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_CMPGT_DEFINED +#if !defined(VUINT8x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -809,9 +781,8 @@ } # define VUINT8x4_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_CMPLE_DEFINED +#if !defined(VUINT8x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -819,9 +790,8 @@ } # define VUINT8x4_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_CMPGE_DEFINED +#if !defined(VUINT8x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -829,9 +799,8 @@ } # define VUINT8x4_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_MIN_DEFINED +#if !defined(VUINT8x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2) { vuint8x4 mask; @@ -841,9 +810,8 @@ } # define VUINT8x4_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_MAX_DEFINED +#if !defined(VUINT8x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2) { vuint8x4 mask; @@ -853,30 +821,8 @@ } # define VUINT8x4_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_AVG_DEFINED -VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2) -{ - vint8x4 ones = vint8x4_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x4_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_RSHIFT_DEFINED +#if !defined(VUINT8x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -884,9 +830,8 @@ } # define VUINT8x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x4_LRSHIFT_DEFINED +#if !defined(VUINT8x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); @@ -894,29 +839,32 @@ } # define VUINT8x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x4_NOT_DEFINED -VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x4_NOT_DEFINED -#endif - - -/* vuint8x8 */ - -#ifndef VINT8x8_SPLAT_DEFINED +#if !defined(VUINT8x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x4_LSHIFT_DEFINED +#endif +#if !defined(VINT8x8_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x) { vint8x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VINT8x8_SPLAT_DEFINED #endif -#ifndef VINT8x8_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8]) { vint8x8 vec; @@ -925,7 +873,7 @@ } # define VINT8x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x8_LOAD_DEFINED +#if !defined(VINT8x8_LOAD_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8]) { vint8x8 vec; @@ -934,21 +882,21 @@ } # define VINT8x8_LOAD_DEFINED #endif -#ifndef VINT8x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x8_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x8_STORE_DEFINED -VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x8_STORE_DEFINED #endif -#ifndef VINT8x8_ADD_DEFINED +#if !defined(VINT8x8_ADD_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -956,7 +904,7 @@ } # define VINT8x8_ADD_DEFINED #endif -#ifndef VINT8x8_SUB_DEFINED +#if !defined(VINT8x8_SUB_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -964,7 +912,7 @@ } # define VINT8x8_SUB_DEFINED #endif -#ifndef VINT8x8_MUL_DEFINED +#if !defined(VINT8x8_MUL_DEFINED) VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -972,106 +920,8 @@ } # define VINT8x8_MUL_DEFINED #endif -#ifndef VINT8x8_AND_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x8_AND_DEFINED -#endif -#ifndef VINT8x8_OR_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x8_OR_DEFINED -#endif -#ifndef VINT8x8_XOR_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x8_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x8_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x8_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x8_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x8_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x8_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_MIN_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2) -{ - vint8x8 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x8_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_MAX_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2) -{ - vint8x8 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x8_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_AVG_DEFINED +#if !defined(VINT8x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2) { vint8x8 ones = vint8x8_splat(1); @@ -1085,19 +935,107 @@ } # define VINT8x8_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_RSHIFT_DEFINED +#if !defined(VINT8x8_AND_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x8_AND_DEFINED +#endif +#if !defined(VINT8x8_OR_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x8_OR_DEFINED +#endif +#if !defined(VINT8x8_XOR_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x8_XOR_DEFINED +#endif +#if !defined(VINT8x8_NOT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x8_NOT_DEFINED +#endif +#if !defined(VINT8x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x8_CMPLT_DEFINED +#endif +#if !defined(VINT8x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x8_CMPEQ_DEFINED +#endif +#if !defined(VINT8x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x8_CMPGT_DEFINED +#endif +#if !defined(VINT8x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x8_CMPLE_DEFINED +#endif +#if !defined(VINT8x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x8_CMPGE_DEFINED +#endif +#if !defined(VINT8x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2) +{ + vint8x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x8_MIN_DEFINED +#endif +#if !defined(VINT8x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2) +{ + vint8x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x8_MAX_DEFINED +#endif +#if !defined(VINT8x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -1105,9 +1043,8 @@ } # define VINT8x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x8_LRSHIFT_DEFINED +#if !defined(VINT8x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -1115,29 +1052,32 @@ } # define VINT8x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x8_NOT_DEFINED -VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x8_NOT_DEFINED -#endif - - -/* vint8x8 */ - -#ifndef VUINT8x8_SPLAT_DEFINED +#if !defined(VINT8x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x8_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x) { vuint8x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VUINT8x8_SPLAT_DEFINED #endif -#ifndef VUINT8x8_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8]) { vuint8x8 vec; @@ -1146,7 +1086,7 @@ } # define VUINT8x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x8_LOAD_DEFINED +#if !defined(VUINT8x8_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8]) { vuint8x8 vec; @@ -1155,21 +1095,21 @@ } # define VUINT8x8_LOAD_DEFINED #endif -#ifndef VUINT8x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x8_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x8_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x8_STORE_DEFINED #endif -#ifndef VUINT8x8_ADD_DEFINED +#if !defined(VUINT8x8_ADD_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -1177,7 +1117,7 @@ } # define VUINT8x8_ADD_DEFINED #endif -#ifndef VUINT8x8_SUB_DEFINED +#if !defined(VUINT8x8_SUB_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -1185,7 +1125,7 @@ } # define VUINT8x8_SUB_DEFINED #endif -#ifndef VUINT8x8_MUL_DEFINED +#if !defined(VUINT8x8_MUL_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -1193,7 +1133,16 @@ } # define VUINT8x8_MUL_DEFINED #endif -#ifndef VUINT8x8_AND_DEFINED +#if !defined(VUINT8x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x8_AVG_DEFINED +#endif +#if !defined(VUINT8x8_AND_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -1201,7 +1150,7 @@ } # define VUINT8x8_AND_DEFINED #endif -#ifndef VUINT8x8_OR_DEFINED +#if !defined(VUINT8x8_OR_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -1209,7 +1158,7 @@ } # define VUINT8x8_OR_DEFINED #endif -#ifndef VUINT8x8_XOR_DEFINED +#if !defined(VUINT8x8_XOR_DEFINED) VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -1217,8 +1166,16 @@ } # define VUINT8x8_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_CMPLT_DEFINED +#if !defined(VUINT8x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x8_NOT_DEFINED +#endif +#if !defined(VUINT8x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -1226,9 +1183,8 @@ } # define VUINT8x8_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_CMPEQ_DEFINED +#if !defined(VUINT8x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -1236,9 +1192,8 @@ } # define VUINT8x8_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_CMPGT_DEFINED +#if !defined(VUINT8x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -1246,9 +1201,8 @@ } # define VUINT8x8_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_CMPLE_DEFINED +#if !defined(VUINT8x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -1256,9 +1210,8 @@ } # define VUINT8x8_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_CMPGE_DEFINED +#if !defined(VUINT8x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -1266,9 +1219,8 @@ } # define VUINT8x8_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_MIN_DEFINED +#if !defined(VUINT8x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2) { vuint8x8 mask; @@ -1278,9 +1230,8 @@ } # define VUINT8x8_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_MAX_DEFINED +#if !defined(VUINT8x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2) { vuint8x8 mask; @@ -1290,30 +1241,8 @@ } # define VUINT8x8_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_AVG_DEFINED -VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2) -{ - vint8x8 ones = vint8x8_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x8_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_RSHIFT_DEFINED +#if !defined(VUINT8x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -1321,9 +1250,8 @@ } # define VUINT8x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x8_LRSHIFT_DEFINED +#if !defined(VUINT8x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -1331,29 +1259,40 @@ } # define VUINT8x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x8_NOT_DEFINED -VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x8_NOT_DEFINED -#endif - - -/* vuint8x16 */ - -#ifndef VINT8x16_SPLAT_DEFINED +#if !defined(VUINT8x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x8_LSHIFT_DEFINED +#endif +#if !defined(VINT8x16_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) { vint8x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VINT8x16_SPLAT_DEFINED #endif -#ifndef VINT8x16_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) { vint8x16 vec; @@ -1362,7 +1301,7 @@ } # define VINT8x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x16_LOAD_DEFINED +#if !defined(VINT8x16_LOAD_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) { vint8x16 vec; @@ -1371,21 +1310,21 @@ } # define VINT8x16_LOAD_DEFINED #endif -#ifndef VINT8x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x16_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x16_STORE_DEFINED -VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x16_STORE_DEFINED #endif -#ifndef VINT8x16_ADD_DEFINED +#if !defined(VINT8x16_ADD_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -1393,7 +1332,7 @@ } # define VINT8x16_ADD_DEFINED #endif -#ifndef VINT8x16_SUB_DEFINED +#if !defined(VINT8x16_SUB_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -1401,7 +1340,7 @@ } # define VINT8x16_SUB_DEFINED #endif -#ifndef VINT8x16_MUL_DEFINED +#if !defined(VINT8x16_MUL_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -1409,106 +1348,8 @@ } # define VINT8x16_MUL_DEFINED #endif -#ifndef VINT8x16_AND_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x16_AND_DEFINED -#endif -#ifndef VINT8x16_OR_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x16_OR_DEFINED -#endif -#ifndef VINT8x16_XOR_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x16_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x16_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x16_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x16_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x16_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x16_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_MIN_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) -{ - vint8x16 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x16_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_MAX_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) -{ - vint8x16 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x16_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_AVG_DEFINED +#if !defined(VINT8x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) { vint8x16 ones = vint8x16_splat(1); @@ -1522,19 +1363,107 @@ } # define VINT8x16_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_RSHIFT_DEFINED +#if !defined(VINT8x16_AND_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x16_AND_DEFINED +#endif +#if !defined(VINT8x16_OR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x16_OR_DEFINED +#endif +#if !defined(VINT8x16_XOR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x16_XOR_DEFINED +#endif +#if !defined(VINT8x16_NOT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x16_NOT_DEFINED +#endif +#if !defined(VINT8x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x16_CMPLT_DEFINED +#endif +#if !defined(VINT8x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x16_CMPEQ_DEFINED +#endif +#if !defined(VINT8x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x16_CMPGT_DEFINED +#endif +#if !defined(VINT8x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x16_CMPLE_DEFINED +#endif +#if !defined(VINT8x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x16_CMPGE_DEFINED +#endif +#if !defined(VINT8x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x16_MIN_DEFINED +#endif +#if !defined(VINT8x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x16_MAX_DEFINED +#endif +#if !defined(VINT8x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -1542,9 +1471,8 @@ } # define VINT8x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x16_LRSHIFT_DEFINED +#if !defined(VINT8x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -1552,29 +1480,40 @@ } # define VINT8x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x16_NOT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x16_NOT_DEFINED -#endif - - -/* vint8x16 */ - -#ifndef VUINT8x16_SPLAT_DEFINED +#if !defined(VINT8x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) { vuint8x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VUINT8x16_SPLAT_DEFINED #endif -#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) { vuint8x16 vec; @@ -1583,7 +1522,7 @@ } # define VUINT8x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x16_LOAD_DEFINED +#if !defined(VUINT8x16_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) { vuint8x16 vec; @@ -1592,21 +1531,21 @@ } # define VUINT8x16_LOAD_DEFINED #endif -#ifndef VUINT8x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x16_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x16_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x16_STORE_DEFINED #endif -#ifndef VUINT8x16_ADD_DEFINED +#if !defined(VUINT8x16_ADD_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -1614,7 +1553,7 @@ } # define VUINT8x16_ADD_DEFINED #endif -#ifndef VUINT8x16_SUB_DEFINED +#if !defined(VUINT8x16_SUB_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -1622,7 +1561,7 @@ } # define VUINT8x16_SUB_DEFINED #endif -#ifndef VUINT8x16_MUL_DEFINED +#if !defined(VUINT8x16_MUL_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -1630,7 +1569,16 @@ } # define VUINT8x16_MUL_DEFINED #endif -#ifndef VUINT8x16_AND_DEFINED +#if !defined(VUINT8x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x16_AVG_DEFINED +#endif +#if !defined(VUINT8x16_AND_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -1638,7 +1586,7 @@ } # define VUINT8x16_AND_DEFINED #endif -#ifndef VUINT8x16_OR_DEFINED +#if !defined(VUINT8x16_OR_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -1646,7 +1594,7 @@ } # define VUINT8x16_OR_DEFINED #endif -#ifndef VUINT8x16_XOR_DEFINED +#if !defined(VUINT8x16_XOR_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -1654,8 +1602,16 @@ } # define VUINT8x16_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_CMPLT_DEFINED +#if !defined(VUINT8x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x16_NOT_DEFINED +#endif +#if !defined(VUINT8x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -1663,9 +1619,8 @@ } # define VUINT8x16_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_CMPEQ_DEFINED +#if !defined(VUINT8x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -1673,9 +1628,8 @@ } # define VUINT8x16_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_CMPGT_DEFINED +#if !defined(VUINT8x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -1683,9 +1637,8 @@ } # define VUINT8x16_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_CMPLE_DEFINED +#if !defined(VUINT8x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -1693,9 +1646,8 @@ } # define VUINT8x16_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_CMPGE_DEFINED +#if !defined(VUINT8x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -1703,9 +1655,8 @@ } # define VUINT8x16_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_MIN_DEFINED +#if !defined(VUINT8x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 mask; @@ -1715,9 +1666,8 @@ } # define VUINT8x16_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_MAX_DEFINED +#if !defined(VUINT8x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 mask; @@ -1727,30 +1677,8 @@ } # define VUINT8x16_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_AVG_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) -{ - vint8x16 ones = vint8x16_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x16_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_RSHIFT_DEFINED +#if !defined(VUINT8x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -1758,9 +1686,8 @@ } # define VUINT8x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x16_LRSHIFT_DEFINED +#if !defined(VUINT8x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -1768,29 +1695,56 @@ } # define VUINT8x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x16_NOT_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x16_NOT_DEFINED -#endif - - -/* vuint8x32 */ - -#ifndef VINT8x32_SPLAT_DEFINED +#if !defined(VUINT8x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VINT8x32_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x) { vint8x32 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; return vec; } # define VINT8x32_SPLAT_DEFINED #endif -#ifndef VINT8x32_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32]) { vint8x32 vec; @@ -1799,7 +1753,7 @@ } # define VINT8x32_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x32_LOAD_DEFINED +#if !defined(VINT8x32_LOAD_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32]) { vint8x32 vec; @@ -1808,21 +1762,21 @@ } # define VINT8x32_LOAD_DEFINED #endif -#ifndef VINT8x32_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 arr[32]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x32_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x32_STORE_DEFINED -VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 arr[32]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x32_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x32_STORE_DEFINED #endif -#ifndef VINT8x32_ADD_DEFINED +#if !defined(VINT8x32_ADD_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -1830,7 +1784,7 @@ } # define VINT8x32_ADD_DEFINED #endif -#ifndef VINT8x32_SUB_DEFINED +#if !defined(VINT8x32_SUB_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -1838,7 +1792,7 @@ } # define VINT8x32_SUB_DEFINED #endif -#ifndef VINT8x32_MUL_DEFINED +#if !defined(VINT8x32_MUL_DEFINED) VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -1846,106 +1800,8 @@ } # define VINT8x32_MUL_DEFINED #endif -#ifndef VINT8x32_AND_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x32_AND_DEFINED -#endif -#ifndef VINT8x32_OR_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x32_OR_DEFINED -#endif -#ifndef VINT8x32_XOR_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x32_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x32_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x32_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x32_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x32_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x32_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_MIN_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2) -{ - vint8x32 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x32_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_MAX_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2) -{ - vint8x32 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x32_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_AVG_DEFINED +#if !defined(VINT8x32_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2) { vint8x32 ones = vint8x32_splat(1); @@ -1959,19 +1815,107 @@ } # define VINT8x32_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x32_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_RSHIFT_DEFINED +#if !defined(VINT8x32_AND_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x32_AND_DEFINED +#endif +#if !defined(VINT8x32_OR_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x32_OR_DEFINED +#endif +#if !defined(VINT8x32_XOR_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x32_XOR_DEFINED +#endif +#if !defined(VINT8x32_NOT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x32_NOT_DEFINED +#endif +#if !defined(VINT8x32_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x32_CMPLT_DEFINED +#endif +#if !defined(VINT8x32_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x32_CMPEQ_DEFINED +#endif +#if !defined(VINT8x32_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x32_CMPGT_DEFINED +#endif +#if !defined(VINT8x32_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x32_CMPLE_DEFINED +#endif +#if !defined(VINT8x32_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x32_CMPGE_DEFINED +#endif +#if !defined(VINT8x32_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2) +{ + vint8x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x32_MIN_DEFINED +#endif +#if !defined(VINT8x32_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2) +{ + vint8x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x32_MAX_DEFINED +#endif +#if !defined(VINT8x32_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -1979,9 +1923,8 @@ } # define VINT8x32_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x32_LRSHIFT_DEFINED +#if !defined(VINT8x32_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -1989,29 +1932,56 @@ } # define VINT8x32_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x32_NOT_DEFINED -VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x32_NOT_DEFINED -#endif - - -/* vint8x32 */ - -#ifndef VUINT8x32_SPLAT_DEFINED +#if !defined(VINT8x32_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x32_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x32_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x) { vuint8x32 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; return vec; } # define VUINT8x32_SPLAT_DEFINED #endif -#ifndef VUINT8x32_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32]) { vuint8x32 vec; @@ -2020,7 +1990,7 @@ } # define VUINT8x32_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x32_LOAD_DEFINED +#if !defined(VUINT8x32_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32]) { vuint8x32 vec; @@ -2029,21 +1999,21 @@ } # define VUINT8x32_LOAD_DEFINED #endif -#ifndef VUINT8x32_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 arr[32]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x32_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x32_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 arr[32]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x32_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x32_STORE_DEFINED #endif -#ifndef VUINT8x32_ADD_DEFINED +#if !defined(VUINT8x32_ADD_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -2051,7 +2021,7 @@ } # define VUINT8x32_ADD_DEFINED #endif -#ifndef VUINT8x32_SUB_DEFINED +#if !defined(VUINT8x32_SUB_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -2059,7 +2029,7 @@ } # define VUINT8x32_SUB_DEFINED #endif -#ifndef VUINT8x32_MUL_DEFINED +#if !defined(VUINT8x32_MUL_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -2067,7 +2037,16 @@ } # define VUINT8x32_MUL_DEFINED #endif -#ifndef VUINT8x32_AND_DEFINED +#if !defined(VUINT8x32_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x32_AVG_DEFINED +#endif +#if !defined(VUINT8x32_AND_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -2075,7 +2054,7 @@ } # define VUINT8x32_AND_DEFINED #endif -#ifndef VUINT8x32_OR_DEFINED +#if !defined(VUINT8x32_OR_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -2083,7 +2062,7 @@ } # define VUINT8x32_OR_DEFINED #endif -#ifndef VUINT8x32_XOR_DEFINED +#if !defined(VUINT8x32_XOR_DEFINED) VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -2091,8 +2070,16 @@ } # define VUINT8x32_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_CMPLT_DEFINED +#if !defined(VUINT8x32_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x32_NOT_DEFINED +#endif +#if !defined(VUINT8x32_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -2100,9 +2087,8 @@ } # define VUINT8x32_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_CMPEQ_DEFINED +#if !defined(VUINT8x32_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -2110,9 +2096,8 @@ } # define VUINT8x32_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_CMPGT_DEFINED +#if !defined(VUINT8x32_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -2120,9 +2105,8 @@ } # define VUINT8x32_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_CMPLE_DEFINED +#if !defined(VUINT8x32_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -2130,9 +2114,8 @@ } # define VUINT8x32_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_CMPGE_DEFINED +#if !defined(VUINT8x32_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -2140,9 +2123,8 @@ } # define VUINT8x32_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_MIN_DEFINED +#if !defined(VUINT8x32_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2) { vuint8x32 mask; @@ -2152,9 +2134,8 @@ } # define VUINT8x32_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_MAX_DEFINED +#if !defined(VUINT8x32_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2) { vuint8x32 mask; @@ -2164,30 +2145,8 @@ } # define VUINT8x32_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_AVG_DEFINED -VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2) -{ - vint8x32 ones = vint8x32_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x32_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x32_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_RSHIFT_DEFINED +#if !defined(VUINT8x32_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -2195,9 +2154,8 @@ } # define VUINT8x32_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x32_LRSHIFT_DEFINED +#if !defined(VUINT8x32_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -2205,29 +2163,88 @@ } # define VUINT8x32_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x32_NOT_DEFINED -VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x32_NOT_DEFINED -#endif - - -/* vuint8x64 */ - -#ifndef VINT8x64_SPLAT_DEFINED +#if !defined(VUINT8x32_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x32_LSHIFT_DEFINED +#endif +#if !defined(VINT8x64_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x) { vint8x64 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; + vec.gcc[32] = x; + vec.gcc[33] = x; + vec.gcc[34] = x; + vec.gcc[35] = x; + vec.gcc[36] = x; + vec.gcc[37] = x; + vec.gcc[38] = x; + vec.gcc[39] = x; + vec.gcc[40] = x; + vec.gcc[41] = x; + vec.gcc[42] = x; + vec.gcc[43] = x; + vec.gcc[44] = x; + vec.gcc[45] = x; + vec.gcc[46] = x; + vec.gcc[47] = x; + vec.gcc[48] = x; + vec.gcc[49] = x; + vec.gcc[50] = x; + vec.gcc[51] = x; + vec.gcc[52] = x; + vec.gcc[53] = x; + vec.gcc[54] = x; + vec.gcc[55] = x; + vec.gcc[56] = x; + vec.gcc[57] = x; + vec.gcc[58] = x; + vec.gcc[59] = x; + vec.gcc[60] = x; + vec.gcc[61] = x; + vec.gcc[62] = x; + vec.gcc[63] = x; return vec; } # define VINT8x64_SPLAT_DEFINED #endif -#ifndef VINT8x64_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64]) { vint8x64 vec; @@ -2236,7 +2253,7 @@ } # define VINT8x64_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x64_LOAD_DEFINED +#if !defined(VINT8x64_LOAD_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64]) { vint8x64 vec; @@ -2245,21 +2262,21 @@ } # define VINT8x64_LOAD_DEFINED #endif -#ifndef VINT8x64_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 arr[64]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT8x64_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x64_STORE_DEFINED -VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 arr[64]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT8x64_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT8x64_STORE_DEFINED #endif -#ifndef VINT8x64_ADD_DEFINED +#if !defined(VINT8x64_ADD_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -2267,7 +2284,7 @@ } # define VINT8x64_ADD_DEFINED #endif -#ifndef VINT8x64_SUB_DEFINED +#if !defined(VINT8x64_SUB_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -2275,7 +2292,7 @@ } # define VINT8x64_SUB_DEFINED #endif -#ifndef VINT8x64_MUL_DEFINED +#if !defined(VINT8x64_MUL_DEFINED) VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -2283,106 +2300,8 @@ } # define VINT8x64_MUL_DEFINED #endif -#ifndef VINT8x64_AND_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT8x64_AND_DEFINED -#endif -#ifndef VINT8x64_OR_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT8x64_OR_DEFINED -#endif -#ifndef VINT8x64_XOR_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT8x64_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_CMPLT_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT8x64_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_CMPEQ_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT8x64_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_CMPGT_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT8x64_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_CMPLE_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT8x64_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_CMPGE_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT8x64_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_MIN_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2) -{ - vint8x64 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x64_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_MAX_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2) -{ - vint8x64 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT8x64_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_AVG_DEFINED +#if !defined(VINT8x64_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2) { vint8x64 ones = vint8x64_splat(1); @@ -2396,19 +2315,107 @@ } # define VINT8x64_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT8x64_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_RSHIFT_DEFINED +#if !defined(VINT8x64_AND_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x64_AND_DEFINED +#endif +#if !defined(VINT8x64_OR_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x64_OR_DEFINED +#endif +#if !defined(VINT8x64_XOR_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x64_XOR_DEFINED +#endif +#if !defined(VINT8x64_NOT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x64_NOT_DEFINED +#endif +#if !defined(VINT8x64_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x64_CMPLT_DEFINED +#endif +#if !defined(VINT8x64_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x64_CMPEQ_DEFINED +#endif +#if !defined(VINT8x64_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x64_CMPGT_DEFINED +#endif +#if !defined(VINT8x64_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x64_CMPLE_DEFINED +#endif +#if !defined(VINT8x64_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x64_CMPGE_DEFINED +#endif +#if !defined(VINT8x64_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2) +{ + vint8x64 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x64_MIN_DEFINED +#endif +#if !defined(VINT8x64_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2) +{ + vint8x64 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x64_MAX_DEFINED +#endif +#if !defined(VINT8x64_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -2416,9 +2423,8 @@ } # define VINT8x64_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT8x64_LRSHIFT_DEFINED +#if !defined(VINT8x64_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -2426,29 +2432,88 @@ } # define VINT8x64_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT8x64_NOT_DEFINED -VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT8x64_NOT_DEFINED -#endif - - -/* vint8x64 */ - -#ifndef VUINT8x64_SPLAT_DEFINED +#if !defined(VINT8x64_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x64_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x64_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x) { vuint8x64 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; + vec.gcc[32] = x; + vec.gcc[33] = x; + vec.gcc[34] = x; + vec.gcc[35] = x; + vec.gcc[36] = x; + vec.gcc[37] = x; + vec.gcc[38] = x; + vec.gcc[39] = x; + vec.gcc[40] = x; + vec.gcc[41] = x; + vec.gcc[42] = x; + vec.gcc[43] = x; + vec.gcc[44] = x; + vec.gcc[45] = x; + vec.gcc[46] = x; + vec.gcc[47] = x; + vec.gcc[48] = x; + vec.gcc[49] = x; + vec.gcc[50] = x; + vec.gcc[51] = x; + vec.gcc[52] = x; + vec.gcc[53] = x; + vec.gcc[54] = x; + vec.gcc[55] = x; + vec.gcc[56] = x; + vec.gcc[57] = x; + vec.gcc[58] = x; + vec.gcc[59] = x; + vec.gcc[60] = x; + vec.gcc[61] = x; + vec.gcc[62] = x; + vec.gcc[63] = x; return vec; } # define VUINT8x64_SPLAT_DEFINED #endif -#ifndef VUINT8x64_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64]) { vuint8x64 vec; @@ -2457,7 +2522,7 @@ } # define VUINT8x64_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x64_LOAD_DEFINED +#if !defined(VUINT8x64_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64]) { vuint8x64 vec; @@ -2466,21 +2531,21 @@ } # define VUINT8x64_LOAD_DEFINED #endif -#ifndef VUINT8x64_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 arr[64]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT8x64_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x64_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 arr[64]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT8x64_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT8x64_STORE_DEFINED #endif -#ifndef VUINT8x64_ADD_DEFINED +#if !defined(VUINT8x64_ADD_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -2488,7 +2553,7 @@ } # define VUINT8x64_ADD_DEFINED #endif -#ifndef VUINT8x64_SUB_DEFINED +#if !defined(VUINT8x64_SUB_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -2496,7 +2561,7 @@ } # define VUINT8x64_SUB_DEFINED #endif -#ifndef VUINT8x64_MUL_DEFINED +#if !defined(VUINT8x64_MUL_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -2504,7 +2569,16 @@ } # define VUINT8x64_MUL_DEFINED #endif -#ifndef VUINT8x64_AND_DEFINED +#if !defined(VUINT8x64_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x64_AVG_DEFINED +#endif +#if !defined(VUINT8x64_AND_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -2512,7 +2586,7 @@ } # define VUINT8x64_AND_DEFINED #endif -#ifndef VUINT8x64_OR_DEFINED +#if !defined(VUINT8x64_OR_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -2520,7 +2594,7 @@ } # define VUINT8x64_OR_DEFINED #endif -#ifndef VUINT8x64_XOR_DEFINED +#if !defined(VUINT8x64_XOR_DEFINED) VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -2528,8 +2602,16 @@ } # define VUINT8x64_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_CMPLT_DEFINED +#if !defined(VUINT8x64_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x64_NOT_DEFINED +#endif +#if !defined(VUINT8x64_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -2537,9 +2619,8 @@ } # define VUINT8x64_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_CMPEQ_DEFINED +#if !defined(VUINT8x64_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -2547,9 +2628,8 @@ } # define VUINT8x64_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_CMPGT_DEFINED +#if !defined(VUINT8x64_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -2557,9 +2637,8 @@ } # define VUINT8x64_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_CMPLE_DEFINED +#if !defined(VUINT8x64_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -2567,9 +2646,8 @@ } # define VUINT8x64_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_CMPGE_DEFINED +#if !defined(VUINT8x64_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -2577,9 +2655,8 @@ } # define VUINT8x64_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_MIN_DEFINED +#if !defined(VUINT8x64_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2) { vuint8x64 mask; @@ -2589,9 +2666,8 @@ } # define VUINT8x64_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_MAX_DEFINED +#if !defined(VUINT8x64_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2) { vuint8x64 mask; @@ -2601,30 +2677,8 @@ } # define VUINT8x64_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_AVG_DEFINED -VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2) -{ - vint8x64 ones = vint8x64_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT8x64_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT8x64_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_RSHIFT_DEFINED +#if !defined(VUINT8x64_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -2632,9 +2686,8 @@ } # define VUINT8x64_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT8x64_LRSHIFT_DEFINED +#if !defined(VUINT8x64_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -2642,29 +2695,26 @@ } # define VUINT8x64_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT8x64_NOT_DEFINED -VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT8x64_NOT_DEFINED -#endif - - -/* vuint16x2 */ - -#ifndef VINT16x2_SPLAT_DEFINED +#if !defined(VUINT8x64_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x64_LSHIFT_DEFINED +#endif +#if !defined(VINT16x2_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x) { vint16x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VINT16x2_SPLAT_DEFINED #endif -#ifndef VINT16x2_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2]) { vint16x2 vec; @@ -2673,7 +2723,7 @@ } # define VINT16x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x2_LOAD_DEFINED +#if !defined(VINT16x2_LOAD_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2]) { vint16x2 vec; @@ -2682,21 +2732,21 @@ } # define VINT16x2_LOAD_DEFINED #endif -#ifndef VINT16x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT16x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT16x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x2_STORE_DEFINED -VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT16x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT16x2_STORE_DEFINED #endif -#ifndef VINT16x2_ADD_DEFINED +#if !defined(VINT16x2_ADD_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -2704,7 +2754,7 @@ } # define VINT16x2_ADD_DEFINED #endif -#ifndef VINT16x2_SUB_DEFINED +#if !defined(VINT16x2_SUB_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -2712,7 +2762,7 @@ } # define VINT16x2_SUB_DEFINED #endif -#ifndef VINT16x2_MUL_DEFINED +#if !defined(VINT16x2_MUL_DEFINED) VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -2720,106 +2770,8 @@ } # define VINT16x2_MUL_DEFINED #endif -#ifndef VINT16x2_AND_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT16x2_AND_DEFINED -#endif -#ifndef VINT16x2_OR_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT16x2_OR_DEFINED -#endif -#ifndef VINT16x2_XOR_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT16x2_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_CMPLT_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT16x2_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_CMPEQ_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT16x2_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_CMPGT_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT16x2_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_CMPLE_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT16x2_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_CMPGE_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT16x2_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_MIN_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2) -{ - vint16x2 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x2_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_MAX_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2) -{ - vint16x2 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x2_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_AVG_DEFINED +#if !defined(VINT16x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2) { vint16x2 ones = vint16x2_splat(1); @@ -2833,19 +2785,107 @@ } # define VINT16x2_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT16x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_RSHIFT_DEFINED +#if !defined(VINT16x2_AND_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x2_AND_DEFINED +#endif +#if !defined(VINT16x2_OR_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x2_OR_DEFINED +#endif +#if !defined(VINT16x2_XOR_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x2_XOR_DEFINED +#endif +#if !defined(VINT16x2_NOT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x2_NOT_DEFINED +#endif +#if !defined(VINT16x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x2_CMPLT_DEFINED +#endif +#if !defined(VINT16x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x2_CMPEQ_DEFINED +#endif +#if !defined(VINT16x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x2_CMPGT_DEFINED +#endif +#if !defined(VINT16x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x2_CMPLE_DEFINED +#endif +#if !defined(VINT16x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x2_CMPGE_DEFINED +#endif +#if !defined(VINT16x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2) +{ + vint16x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x2_MIN_DEFINED +#endif +#if !defined(VINT16x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2) +{ + vint16x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x2_MAX_DEFINED +#endif +#if !defined(VINT16x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -2853,9 +2893,8 @@ } # define VINT16x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x2_LRSHIFT_DEFINED +#if !defined(VINT16x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); @@ -2863,29 +2902,26 @@ } # define VINT16x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT16x2_NOT_DEFINED -VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT16x2_NOT_DEFINED -#endif - - -/* vint16x2 */ - -#ifndef VUINT16x2_SPLAT_DEFINED +#if !defined(VINT16x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x2_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x) { vuint16x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VUINT16x2_SPLAT_DEFINED #endif -#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2]) { vuint16x2 vec; @@ -2894,7 +2930,7 @@ } # define VUINT16x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x2_LOAD_DEFINED +#if !defined(VUINT16x2_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2]) { vuint16x2 vec; @@ -2903,21 +2939,21 @@ } # define VUINT16x2_LOAD_DEFINED #endif -#ifndef VUINT16x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT16x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x2_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT16x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT16x2_STORE_DEFINED #endif -#ifndef VUINT16x2_ADD_DEFINED +#if !defined(VUINT16x2_ADD_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -2925,7 +2961,7 @@ } # define VUINT16x2_ADD_DEFINED #endif -#ifndef VUINT16x2_SUB_DEFINED +#if !defined(VUINT16x2_SUB_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -2933,7 +2969,7 @@ } # define VUINT16x2_SUB_DEFINED #endif -#ifndef VUINT16x2_MUL_DEFINED +#if !defined(VUINT16x2_MUL_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -2941,7 +2977,16 @@ } # define VUINT16x2_MUL_DEFINED #endif -#ifndef VUINT16x2_AND_DEFINED +#if !defined(VUINT16x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x2_AVG_DEFINED +#endif +#if !defined(VUINT16x2_AND_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -2949,7 +2994,7 @@ } # define VUINT16x2_AND_DEFINED #endif -#ifndef VUINT16x2_OR_DEFINED +#if !defined(VUINT16x2_OR_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -2957,7 +3002,7 @@ } # define VUINT16x2_OR_DEFINED #endif -#ifndef VUINT16x2_XOR_DEFINED +#if !defined(VUINT16x2_XOR_DEFINED) VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -2965,8 +3010,16 @@ } # define VUINT16x2_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_CMPLT_DEFINED +#if !defined(VUINT16x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x2_NOT_DEFINED +#endif +#if !defined(VUINT16x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -2974,9 +3027,8 @@ } # define VUINT16x2_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_CMPEQ_DEFINED +#if !defined(VUINT16x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -2984,9 +3036,8 @@ } # define VUINT16x2_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_CMPGT_DEFINED +#if !defined(VUINT16x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -2994,9 +3045,8 @@ } # define VUINT16x2_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_CMPLE_DEFINED +#if !defined(VUINT16x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -3004,9 +3054,8 @@ } # define VUINT16x2_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_CMPGE_DEFINED +#if !defined(VUINT16x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -3014,9 +3063,8 @@ } # define VUINT16x2_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_MIN_DEFINED +#if !defined(VUINT16x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2) { vuint16x2 mask; @@ -3026,9 +3074,8 @@ } # define VUINT16x2_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_MAX_DEFINED +#if !defined(VUINT16x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2) { vuint16x2 mask; @@ -3038,30 +3085,8 @@ } # define VUINT16x2_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_AVG_DEFINED -VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2) -{ - vint16x2 ones = vint16x2_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT16x2_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT16x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_RSHIFT_DEFINED +#if !defined(VUINT16x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -3069,9 +3094,8 @@ } # define VUINT16x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x2_LRSHIFT_DEFINED +#if !defined(VUINT16x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); @@ -3079,29 +3103,28 @@ } # define VUINT16x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT16x2_NOT_DEFINED -VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT16x2_NOT_DEFINED -#endif - - -/* vuint16x4 */ - -#ifndef VINT16x4_SPLAT_DEFINED +#if !defined(VUINT16x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x2_LSHIFT_DEFINED +#endif +#if !defined(VINT16x4_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x) { vint16x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VINT16x4_SPLAT_DEFINED #endif -#ifndef VINT16x4_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4]) { vint16x4 vec; @@ -3110,7 +3133,7 @@ } # define VINT16x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x4_LOAD_DEFINED +#if !defined(VINT16x4_LOAD_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4]) { vint16x4 vec; @@ -3119,21 +3142,21 @@ } # define VINT16x4_LOAD_DEFINED #endif -#ifndef VINT16x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT16x4_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x4_STORE_DEFINED -VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT16x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT16x4_STORE_DEFINED #endif -#ifndef VINT16x4_ADD_DEFINED +#if !defined(VINT16x4_ADD_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -3141,7 +3164,7 @@ } # define VINT16x4_ADD_DEFINED #endif -#ifndef VINT16x4_SUB_DEFINED +#if !defined(VINT16x4_SUB_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -3149,7 +3172,7 @@ } # define VINT16x4_SUB_DEFINED #endif -#ifndef VINT16x4_MUL_DEFINED +#if !defined(VINT16x4_MUL_DEFINED) VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -3157,106 +3180,8 @@ } # define VINT16x4_MUL_DEFINED #endif -#ifndef VINT16x4_AND_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT16x4_AND_DEFINED -#endif -#ifndef VINT16x4_OR_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT16x4_OR_DEFINED -#endif -#ifndef VINT16x4_XOR_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT16x4_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_CMPLT_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT16x4_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_CMPEQ_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT16x4_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_CMPGT_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT16x4_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_CMPLE_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT16x4_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_CMPGE_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT16x4_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_MIN_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2) -{ - vint16x4 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x4_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_MAX_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2) -{ - vint16x4 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x4_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_AVG_DEFINED +#if !defined(VINT16x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2) { vint16x4 ones = vint16x4_splat(1); @@ -3270,19 +3195,107 @@ } # define VINT16x4_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT16x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_RSHIFT_DEFINED +#if !defined(VINT16x4_AND_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x4_AND_DEFINED +#endif +#if !defined(VINT16x4_OR_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x4_OR_DEFINED +#endif +#if !defined(VINT16x4_XOR_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x4_XOR_DEFINED +#endif +#if !defined(VINT16x4_NOT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x4_NOT_DEFINED +#endif +#if !defined(VINT16x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x4_CMPLT_DEFINED +#endif +#if !defined(VINT16x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x4_CMPEQ_DEFINED +#endif +#if !defined(VINT16x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x4_CMPGT_DEFINED +#endif +#if !defined(VINT16x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x4_CMPLE_DEFINED +#endif +#if !defined(VINT16x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x4_CMPGE_DEFINED +#endif +#if !defined(VINT16x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2) +{ + vint16x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x4_MIN_DEFINED +#endif +#if !defined(VINT16x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2) +{ + vint16x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x4_MAX_DEFINED +#endif +#if !defined(VINT16x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -3290,9 +3303,8 @@ } # define VINT16x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x4_LRSHIFT_DEFINED +#if !defined(VINT16x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -3300,29 +3312,28 @@ } # define VINT16x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT16x4_NOT_DEFINED -VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT16x4_NOT_DEFINED -#endif - - -/* vint16x4 */ - -#ifndef VUINT16x4_SPLAT_DEFINED +#if !defined(VINT16x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x4_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x) { vuint16x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VUINT16x4_SPLAT_DEFINED #endif -#ifndef VUINT16x4_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4]) { vuint16x4 vec; @@ -3331,7 +3342,7 @@ } # define VUINT16x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x4_LOAD_DEFINED +#if !defined(VUINT16x4_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4]) { vuint16x4 vec; @@ -3340,21 +3351,21 @@ } # define VUINT16x4_LOAD_DEFINED #endif -#ifndef VUINT16x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT16x4_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x4_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT16x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT16x4_STORE_DEFINED #endif -#ifndef VUINT16x4_ADD_DEFINED +#if !defined(VUINT16x4_ADD_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -3362,7 +3373,7 @@ } # define VUINT16x4_ADD_DEFINED #endif -#ifndef VUINT16x4_SUB_DEFINED +#if !defined(VUINT16x4_SUB_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -3370,7 +3381,7 @@ } # define VUINT16x4_SUB_DEFINED #endif -#ifndef VUINT16x4_MUL_DEFINED +#if !defined(VUINT16x4_MUL_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -3378,7 +3389,16 @@ } # define VUINT16x4_MUL_DEFINED #endif -#ifndef VUINT16x4_AND_DEFINED +#if !defined(VUINT16x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x4_AVG_DEFINED +#endif +#if !defined(VUINT16x4_AND_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -3386,7 +3406,7 @@ } # define VUINT16x4_AND_DEFINED #endif -#ifndef VUINT16x4_OR_DEFINED +#if !defined(VUINT16x4_OR_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -3394,7 +3414,7 @@ } # define VUINT16x4_OR_DEFINED #endif -#ifndef VUINT16x4_XOR_DEFINED +#if !defined(VUINT16x4_XOR_DEFINED) VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -3402,8 +3422,16 @@ } # define VUINT16x4_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_CMPLT_DEFINED +#if !defined(VUINT16x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x4_NOT_DEFINED +#endif +#if !defined(VUINT16x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -3411,9 +3439,8 @@ } # define VUINT16x4_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_CMPEQ_DEFINED +#if !defined(VUINT16x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -3421,9 +3448,8 @@ } # define VUINT16x4_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_CMPGT_DEFINED +#if !defined(VUINT16x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -3431,9 +3457,8 @@ } # define VUINT16x4_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_CMPLE_DEFINED +#if !defined(VUINT16x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -3441,9 +3466,8 @@ } # define VUINT16x4_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_CMPGE_DEFINED +#if !defined(VUINT16x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -3451,9 +3475,8 @@ } # define VUINT16x4_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_MIN_DEFINED +#if !defined(VUINT16x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2) { vuint16x4 mask; @@ -3463,9 +3486,8 @@ } # define VUINT16x4_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_MAX_DEFINED +#if !defined(VUINT16x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2) { vuint16x4 mask; @@ -3475,30 +3497,8 @@ } # define VUINT16x4_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_AVG_DEFINED -VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2) -{ - vint16x4 ones = vint16x4_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT16x4_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT16x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_RSHIFT_DEFINED +#if !defined(VUINT16x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -3506,9 +3506,8 @@ } # define VUINT16x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x4_LRSHIFT_DEFINED +#if !defined(VUINT16x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -3516,29 +3515,32 @@ } # define VUINT16x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT16x4_NOT_DEFINED -VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT16x4_NOT_DEFINED -#endif - - -/* vuint16x8 */ - -#ifndef VINT16x8_SPLAT_DEFINED +#if !defined(VUINT16x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x4_LSHIFT_DEFINED +#endif +#if !defined(VINT16x8_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) { vint16x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VINT16x8_SPLAT_DEFINED #endif -#ifndef VINT16x8_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) { vint16x8 vec; @@ -3547,7 +3549,7 @@ } # define VINT16x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x8_LOAD_DEFINED +#if !defined(VINT16x8_LOAD_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) { vint16x8 vec; @@ -3556,21 +3558,21 @@ } # define VINT16x8_LOAD_DEFINED #endif -#ifndef VINT16x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT16x8_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x8_STORE_DEFINED -VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT16x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT16x8_STORE_DEFINED #endif -#ifndef VINT16x8_ADD_DEFINED +#if !defined(VINT16x8_ADD_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -3578,7 +3580,7 @@ } # define VINT16x8_ADD_DEFINED #endif -#ifndef VINT16x8_SUB_DEFINED +#if !defined(VINT16x8_SUB_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -3586,7 +3588,7 @@ } # define VINT16x8_SUB_DEFINED #endif -#ifndef VINT16x8_MUL_DEFINED +#if !defined(VINT16x8_MUL_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -3594,106 +3596,8 @@ } # define VINT16x8_MUL_DEFINED #endif -#ifndef VINT16x8_AND_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT16x8_AND_DEFINED -#endif -#ifndef VINT16x8_OR_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT16x8_OR_DEFINED -#endif -#ifndef VINT16x8_XOR_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT16x8_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_CMPLT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT16x8_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_CMPEQ_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT16x8_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_CMPGT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT16x8_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_CMPLE_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT16x8_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_CMPGE_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT16x8_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_MIN_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) -{ - vint16x8 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x8_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_MAX_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) -{ - vint16x8 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x8_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_AVG_DEFINED +#if !defined(VINT16x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) { vint16x8 ones = vint16x8_splat(1); @@ -3707,19 +3611,107 @@ } # define VINT16x8_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT16x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_RSHIFT_DEFINED +#if !defined(VINT16x8_AND_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x8_AND_DEFINED +#endif +#if !defined(VINT16x8_OR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x8_OR_DEFINED +#endif +#if !defined(VINT16x8_XOR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x8_XOR_DEFINED +#endif +#if !defined(VINT16x8_NOT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x8_NOT_DEFINED +#endif +#if !defined(VINT16x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x8_CMPLT_DEFINED +#endif +#if !defined(VINT16x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x8_CMPEQ_DEFINED +#endif +#if !defined(VINT16x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x8_CMPGT_DEFINED +#endif +#if !defined(VINT16x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x8_CMPLE_DEFINED +#endif +#if !defined(VINT16x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x8_CMPGE_DEFINED +#endif +#if !defined(VINT16x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x8_MIN_DEFINED +#endif +#if !defined(VINT16x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x8_MAX_DEFINED +#endif +#if !defined(VINT16x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -3727,9 +3719,8 @@ } # define VINT16x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x8_LRSHIFT_DEFINED +#if !defined(VINT16x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -3737,29 +3728,32 @@ } # define VINT16x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT16x8_NOT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT16x8_NOT_DEFINED -#endif - - -/* vint16x8 */ - -#ifndef VUINT16x8_SPLAT_DEFINED +#if !defined(VINT16x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) { vuint16x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VUINT16x8_SPLAT_DEFINED #endif -#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) { vuint16x8 vec; @@ -3768,7 +3762,7 @@ } # define VUINT16x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x8_LOAD_DEFINED +#if !defined(VUINT16x8_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) { vuint16x8 vec; @@ -3777,21 +3771,21 @@ } # define VUINT16x8_LOAD_DEFINED #endif -#ifndef VUINT16x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT16x8_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x8_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT16x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT16x8_STORE_DEFINED #endif -#ifndef VUINT16x8_ADD_DEFINED +#if !defined(VUINT16x8_ADD_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -3799,7 +3793,7 @@ } # define VUINT16x8_ADD_DEFINED #endif -#ifndef VUINT16x8_SUB_DEFINED +#if !defined(VUINT16x8_SUB_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -3807,7 +3801,7 @@ } # define VUINT16x8_SUB_DEFINED #endif -#ifndef VUINT16x8_MUL_DEFINED +#if !defined(VUINT16x8_MUL_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -3815,7 +3809,16 @@ } # define VUINT16x8_MUL_DEFINED #endif -#ifndef VUINT16x8_AND_DEFINED +#if !defined(VUINT16x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x8_AVG_DEFINED +#endif +#if !defined(VUINT16x8_AND_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -3823,7 +3826,7 @@ } # define VUINT16x8_AND_DEFINED #endif -#ifndef VUINT16x8_OR_DEFINED +#if !defined(VUINT16x8_OR_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -3831,7 +3834,7 @@ } # define VUINT16x8_OR_DEFINED #endif -#ifndef VUINT16x8_XOR_DEFINED +#if !defined(VUINT16x8_XOR_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -3839,8 +3842,16 @@ } # define VUINT16x8_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_CMPLT_DEFINED +#if !defined(VUINT16x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x8_NOT_DEFINED +#endif +#if !defined(VUINT16x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -3848,9 +3859,8 @@ } # define VUINT16x8_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_CMPEQ_DEFINED +#if !defined(VUINT16x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -3858,9 +3868,8 @@ } # define VUINT16x8_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_CMPGT_DEFINED +#if !defined(VUINT16x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -3868,9 +3877,8 @@ } # define VUINT16x8_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_CMPLE_DEFINED +#if !defined(VUINT16x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -3878,9 +3886,8 @@ } # define VUINT16x8_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_CMPGE_DEFINED +#if !defined(VUINT16x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -3888,9 +3895,8 @@ } # define VUINT16x8_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_MIN_DEFINED +#if !defined(VUINT16x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 mask; @@ -3900,9 +3906,8 @@ } # define VUINT16x8_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_MAX_DEFINED +#if !defined(VUINT16x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 mask; @@ -3912,30 +3917,8 @@ } # define VUINT16x8_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_AVG_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) -{ - vint16x8 ones = vint16x8_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT16x8_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT16x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_RSHIFT_DEFINED +#if !defined(VUINT16x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -3943,9 +3926,8 @@ } # define VUINT16x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x8_LRSHIFT_DEFINED +#if !defined(VUINT16x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -3953,29 +3935,40 @@ } # define VUINT16x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT16x8_NOT_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT16x8_NOT_DEFINED -#endif - - -/* vuint16x16 */ - -#ifndef VINT16x16_SPLAT_DEFINED +#if !defined(VUINT16x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VINT16x16_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x) { vint16x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VINT16x16_SPLAT_DEFINED #endif -#ifndef VINT16x16_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16]) { vint16x16 vec; @@ -3984,7 +3977,7 @@ } # define VINT16x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x16_LOAD_DEFINED +#if !defined(VINT16x16_LOAD_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16]) { vint16x16 vec; @@ -3993,21 +3986,21 @@ } # define VINT16x16_LOAD_DEFINED #endif -#ifndef VINT16x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT16x16_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x16_STORE_DEFINED -VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT16x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT16x16_STORE_DEFINED #endif -#ifndef VINT16x16_ADD_DEFINED +#if !defined(VINT16x16_ADD_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -4015,7 +4008,7 @@ } # define VINT16x16_ADD_DEFINED #endif -#ifndef VINT16x16_SUB_DEFINED +#if !defined(VINT16x16_SUB_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -4023,7 +4016,7 @@ } # define VINT16x16_SUB_DEFINED #endif -#ifndef VINT16x16_MUL_DEFINED +#if !defined(VINT16x16_MUL_DEFINED) VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -4031,106 +4024,8 @@ } # define VINT16x16_MUL_DEFINED #endif -#ifndef VINT16x16_AND_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT16x16_AND_DEFINED -#endif -#ifndef VINT16x16_OR_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT16x16_OR_DEFINED -#endif -#ifndef VINT16x16_XOR_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT16x16_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_CMPLT_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT16x16_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_CMPEQ_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT16x16_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_CMPGT_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT16x16_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_CMPLE_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT16x16_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_CMPGE_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT16x16_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_MIN_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2) -{ - vint16x16 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x16_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_MAX_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2) -{ - vint16x16 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x16_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_AVG_DEFINED +#if !defined(VINT16x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2) { vint16x16 ones = vint16x16_splat(1); @@ -4144,19 +4039,107 @@ } # define VINT16x16_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT16x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_RSHIFT_DEFINED +#if !defined(VINT16x16_AND_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x16_AND_DEFINED +#endif +#if !defined(VINT16x16_OR_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x16_OR_DEFINED +#endif +#if !defined(VINT16x16_XOR_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x16_XOR_DEFINED +#endif +#if !defined(VINT16x16_NOT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x16_NOT_DEFINED +#endif +#if !defined(VINT16x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x16_CMPLT_DEFINED +#endif +#if !defined(VINT16x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x16_CMPEQ_DEFINED +#endif +#if !defined(VINT16x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x16_CMPGT_DEFINED +#endif +#if !defined(VINT16x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x16_CMPLE_DEFINED +#endif +#if !defined(VINT16x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x16_CMPGE_DEFINED +#endif +#if !defined(VINT16x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2) +{ + vint16x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x16_MIN_DEFINED +#endif +#if !defined(VINT16x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2) +{ + vint16x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x16_MAX_DEFINED +#endif +#if !defined(VINT16x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -4164,9 +4147,8 @@ } # define VINT16x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x16_LRSHIFT_DEFINED +#if !defined(VINT16x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -4174,29 +4156,40 @@ } # define VINT16x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT16x16_NOT_DEFINED -VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT16x16_NOT_DEFINED -#endif - - -/* vint16x16 */ - -#ifndef VUINT16x16_SPLAT_DEFINED +#if !defined(VINT16x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x16_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x) { vuint16x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VUINT16x16_SPLAT_DEFINED #endif -#ifndef VUINT16x16_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16]) { vuint16x16 vec; @@ -4205,7 +4198,7 @@ } # define VUINT16x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x16_LOAD_DEFINED +#if !defined(VUINT16x16_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16]) { vuint16x16 vec; @@ -4214,21 +4207,21 @@ } # define VUINT16x16_LOAD_DEFINED #endif -#ifndef VUINT16x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT16x16_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x16_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT16x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT16x16_STORE_DEFINED #endif -#ifndef VUINT16x16_ADD_DEFINED +#if !defined(VUINT16x16_ADD_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -4236,7 +4229,7 @@ } # define VUINT16x16_ADD_DEFINED #endif -#ifndef VUINT16x16_SUB_DEFINED +#if !defined(VUINT16x16_SUB_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -4244,7 +4237,7 @@ } # define VUINT16x16_SUB_DEFINED #endif -#ifndef VUINT16x16_MUL_DEFINED +#if !defined(VUINT16x16_MUL_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -4252,7 +4245,16 @@ } # define VUINT16x16_MUL_DEFINED #endif -#ifndef VUINT16x16_AND_DEFINED +#if !defined(VUINT16x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x16_AVG_DEFINED +#endif +#if !defined(VUINT16x16_AND_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -4260,7 +4262,7 @@ } # define VUINT16x16_AND_DEFINED #endif -#ifndef VUINT16x16_OR_DEFINED +#if !defined(VUINT16x16_OR_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -4268,7 +4270,7 @@ } # define VUINT16x16_OR_DEFINED #endif -#ifndef VUINT16x16_XOR_DEFINED +#if !defined(VUINT16x16_XOR_DEFINED) VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -4276,8 +4278,16 @@ } # define VUINT16x16_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_CMPLT_DEFINED +#if !defined(VUINT16x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x16_NOT_DEFINED +#endif +#if !defined(VUINT16x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -4285,9 +4295,8 @@ } # define VUINT16x16_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_CMPEQ_DEFINED +#if !defined(VUINT16x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -4295,9 +4304,8 @@ } # define VUINT16x16_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_CMPGT_DEFINED +#if !defined(VUINT16x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -4305,9 +4313,8 @@ } # define VUINT16x16_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_CMPLE_DEFINED +#if !defined(VUINT16x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -4315,9 +4322,8 @@ } # define VUINT16x16_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_CMPGE_DEFINED +#if !defined(VUINT16x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -4325,9 +4331,8 @@ } # define VUINT16x16_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_MIN_DEFINED +#if !defined(VUINT16x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2) { vuint16x16 mask; @@ -4337,9 +4342,8 @@ } # define VUINT16x16_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_MAX_DEFINED +#if !defined(VUINT16x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2) { vuint16x16 mask; @@ -4349,30 +4353,8 @@ } # define VUINT16x16_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_AVG_DEFINED -VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2) -{ - vint16x16 ones = vint16x16_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT16x16_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT16x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_RSHIFT_DEFINED +#if !defined(VUINT16x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -4380,9 +4362,8 @@ } # define VUINT16x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x16_LRSHIFT_DEFINED +#if !defined(VUINT16x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -4390,29 +4371,56 @@ } # define VUINT16x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT16x16_NOT_DEFINED -VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT16x16_NOT_DEFINED -#endif - - -/* vuint16x32 */ - -#ifndef VINT16x32_SPLAT_DEFINED +#if !defined(VUINT16x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x16_LSHIFT_DEFINED +#endif +#if !defined(VINT16x32_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x) { vint16x32 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; return vec; } # define VINT16x32_SPLAT_DEFINED #endif -#ifndef VINT16x32_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32]) { vint16x32 vec; @@ -4421,7 +4429,7 @@ } # define VINT16x32_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x32_LOAD_DEFINED +#if !defined(VINT16x32_LOAD_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32]) { vint16x32 vec; @@ -4430,21 +4438,21 @@ } # define VINT16x32_LOAD_DEFINED #endif -#ifndef VINT16x32_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 arr[32]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT16x32_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x32_STORE_DEFINED -VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 arr[32]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT16x32_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT16x32_STORE_DEFINED #endif -#ifndef VINT16x32_ADD_DEFINED +#if !defined(VINT16x32_ADD_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -4452,7 +4460,7 @@ } # define VINT16x32_ADD_DEFINED #endif -#ifndef VINT16x32_SUB_DEFINED +#if !defined(VINT16x32_SUB_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -4460,7 +4468,7 @@ } # define VINT16x32_SUB_DEFINED #endif -#ifndef VINT16x32_MUL_DEFINED +#if !defined(VINT16x32_MUL_DEFINED) VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -4468,106 +4476,8 @@ } # define VINT16x32_MUL_DEFINED #endif -#ifndef VINT16x32_AND_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT16x32_AND_DEFINED -#endif -#ifndef VINT16x32_OR_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT16x32_OR_DEFINED -#endif -#ifndef VINT16x32_XOR_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT16x32_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_CMPLT_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT16x32_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_CMPEQ_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT16x32_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_CMPGT_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT16x32_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_CMPLE_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT16x32_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_CMPGE_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT16x32_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_MIN_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2) -{ - vint16x32 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x32_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_MAX_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2) -{ - vint16x32 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT16x32_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_AVG_DEFINED +#if !defined(VINT16x32_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2) { vint16x32 ones = vint16x32_splat(1); @@ -4581,19 +4491,107 @@ } # define VINT16x32_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT16x32_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_RSHIFT_DEFINED +#if !defined(VINT16x32_AND_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x32_AND_DEFINED +#endif +#if !defined(VINT16x32_OR_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x32_OR_DEFINED +#endif +#if !defined(VINT16x32_XOR_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x32_XOR_DEFINED +#endif +#if !defined(VINT16x32_NOT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x32_NOT_DEFINED +#endif +#if !defined(VINT16x32_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x32_CMPLT_DEFINED +#endif +#if !defined(VINT16x32_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x32_CMPEQ_DEFINED +#endif +#if !defined(VINT16x32_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x32_CMPGT_DEFINED +#endif +#if !defined(VINT16x32_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x32_CMPLE_DEFINED +#endif +#if !defined(VINT16x32_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x32_CMPGE_DEFINED +#endif +#if !defined(VINT16x32_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2) +{ + vint16x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x32_MIN_DEFINED +#endif +#if !defined(VINT16x32_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2) +{ + vint16x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x32_MAX_DEFINED +#endif +#if !defined(VINT16x32_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -4601,9 +4599,8 @@ } # define VINT16x32_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT16x32_LRSHIFT_DEFINED +#if !defined(VINT16x32_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -4611,29 +4608,56 @@ } # define VINT16x32_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT16x32_NOT_DEFINED -VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT16x32_NOT_DEFINED -#endif - - -/* vint16x32 */ - -#ifndef VUINT16x32_SPLAT_DEFINED +#if !defined(VINT16x32_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x32_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x32_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x) { vuint16x32 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + vec.gcc[16] = x; + vec.gcc[17] = x; + vec.gcc[18] = x; + vec.gcc[19] = x; + vec.gcc[20] = x; + vec.gcc[21] = x; + vec.gcc[22] = x; + vec.gcc[23] = x; + vec.gcc[24] = x; + vec.gcc[25] = x; + vec.gcc[26] = x; + vec.gcc[27] = x; + vec.gcc[28] = x; + vec.gcc[29] = x; + vec.gcc[30] = x; + vec.gcc[31] = x; return vec; } # define VUINT16x32_SPLAT_DEFINED #endif -#ifndef VUINT16x32_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32]) { vuint16x32 vec; @@ -4642,7 +4666,7 @@ } # define VUINT16x32_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x32_LOAD_DEFINED +#if !defined(VUINT16x32_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32]) { vuint16x32 vec; @@ -4651,21 +4675,21 @@ } # define VUINT16x32_LOAD_DEFINED #endif -#ifndef VUINT16x32_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 arr[32]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT16x32_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x32_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 arr[32]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT16x32_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT16x32_STORE_DEFINED #endif -#ifndef VUINT16x32_ADD_DEFINED +#if !defined(VUINT16x32_ADD_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -4673,7 +4697,7 @@ } # define VUINT16x32_ADD_DEFINED #endif -#ifndef VUINT16x32_SUB_DEFINED +#if !defined(VUINT16x32_SUB_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -4681,7 +4705,7 @@ } # define VUINT16x32_SUB_DEFINED #endif -#ifndef VUINT16x32_MUL_DEFINED +#if !defined(VUINT16x32_MUL_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -4689,7 +4713,16 @@ } # define VUINT16x32_MUL_DEFINED #endif -#ifndef VUINT16x32_AND_DEFINED +#if !defined(VUINT16x32_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x32_AVG_DEFINED +#endif +#if !defined(VUINT16x32_AND_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -4697,7 +4730,7 @@ } # define VUINT16x32_AND_DEFINED #endif -#ifndef VUINT16x32_OR_DEFINED +#if !defined(VUINT16x32_OR_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -4705,7 +4738,7 @@ } # define VUINT16x32_OR_DEFINED #endif -#ifndef VUINT16x32_XOR_DEFINED +#if !defined(VUINT16x32_XOR_DEFINED) VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -4713,8 +4746,16 @@ } # define VUINT16x32_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_CMPLT_DEFINED +#if !defined(VUINT16x32_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x32_NOT_DEFINED +#endif +#if !defined(VUINT16x32_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -4722,9 +4763,8 @@ } # define VUINT16x32_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_CMPEQ_DEFINED +#if !defined(VUINT16x32_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -4732,9 +4772,8 @@ } # define VUINT16x32_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_CMPGT_DEFINED +#if !defined(VUINT16x32_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -4742,9 +4781,8 @@ } # define VUINT16x32_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_CMPLE_DEFINED +#if !defined(VUINT16x32_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -4752,9 +4790,8 @@ } # define VUINT16x32_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_CMPGE_DEFINED +#if !defined(VUINT16x32_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -4762,9 +4799,8 @@ } # define VUINT16x32_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_MIN_DEFINED +#if !defined(VUINT16x32_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2) { vuint16x32 mask; @@ -4774,9 +4810,8 @@ } # define VUINT16x32_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_MAX_DEFINED +#if !defined(VUINT16x32_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2) { vuint16x32 mask; @@ -4786,30 +4821,8 @@ } # define VUINT16x32_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_AVG_DEFINED -VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2) -{ - vint16x32 ones = vint16x32_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT16x32_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT16x32_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_RSHIFT_DEFINED +#if !defined(VUINT16x32_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -4817,9 +4830,8 @@ } # define VUINT16x32_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT16x32_LRSHIFT_DEFINED +#if !defined(VUINT16x32_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -4827,29 +4839,26 @@ } # define VUINT16x32_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT16x32_NOT_DEFINED -VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT16x32_NOT_DEFINED -#endif - - -/* vuint32x2 */ - -#ifndef VINT32x2_SPLAT_DEFINED +#if !defined(VUINT16x32_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x32_LSHIFT_DEFINED +#endif +#if !defined(VINT32x2_SPLAT_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x) { vint32x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VINT32x2_SPLAT_DEFINED #endif -#ifndef VINT32x2_LOAD_ALIGNED_DEFINED +#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2]) { vint32x2 vec; @@ -4858,7 +4867,7 @@ } # define VINT32x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x2_LOAD_DEFINED +#if !defined(VINT32x2_LOAD_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2]) { vint32x2 vec; @@ -4867,21 +4876,21 @@ } # define VINT32x2_LOAD_DEFINED #endif -#ifndef VINT32x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT32x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x2_STORE_DEFINED -VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT32x2_STORE_DEFINED #endif -#ifndef VINT32x2_ADD_DEFINED +#if !defined(VINT32x2_ADD_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -4889,7 +4898,7 @@ } # define VINT32x2_ADD_DEFINED #endif -#ifndef VINT32x2_SUB_DEFINED +#if !defined(VINT32x2_SUB_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -4897,7 +4906,7 @@ } # define VINT32x2_SUB_DEFINED #endif -#ifndef VINT32x2_MUL_DEFINED +#if !defined(VINT32x2_MUL_DEFINED) VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -4905,106 +4914,8 @@ } # define VINT32x2_MUL_DEFINED #endif -#ifndef VINT32x2_AND_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT32x2_AND_DEFINED -#endif -#ifndef VINT32x2_OR_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT32x2_OR_DEFINED -#endif -#ifndef VINT32x2_XOR_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT32x2_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_CMPLT_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT32x2_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_CMPEQ_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT32x2_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_CMPGT_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT32x2_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_CMPLE_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT32x2_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_CMPGE_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT32x2_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_MIN_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2) -{ - vint32x2 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x2_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_MAX_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2) -{ - vint32x2 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x2_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_AVG_DEFINED +#if !defined(VINT32x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2) { vint32x2 ones = vint32x2_splat(1); @@ -5018,19 +4929,107 @@ } # define VINT32x2_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT32x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_RSHIFT_DEFINED +#if !defined(VINT32x2_AND_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x2_AND_DEFINED +#endif +#if !defined(VINT32x2_OR_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x2_OR_DEFINED +#endif +#if !defined(VINT32x2_XOR_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x2_XOR_DEFINED +#endif +#if !defined(VINT32x2_NOT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x2_NOT_DEFINED +#endif +#if !defined(VINT32x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x2_CMPLT_DEFINED +#endif +#if !defined(VINT32x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x2_CMPEQ_DEFINED +#endif +#if !defined(VINT32x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x2_CMPGT_DEFINED +#endif +#if !defined(VINT32x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x2_CMPLE_DEFINED +#endif +#if !defined(VINT32x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x2_CMPGE_DEFINED +#endif +#if !defined(VINT32x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2) +{ + vint32x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x2_MIN_DEFINED +#endif +#if !defined(VINT32x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2) +{ + vint32x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x2_MAX_DEFINED +#endif +#if !defined(VINT32x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -5038,9 +5037,8 @@ } # define VINT32x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x2_LRSHIFT_DEFINED +#if !defined(VINT32x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -5048,29 +5046,26 @@ } # define VINT32x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT32x2_NOT_DEFINED -VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT32x2_NOT_DEFINED -#endif - - -/* vint32x2 */ - -#ifndef VUINT32x2_SPLAT_DEFINED +#if !defined(VINT32x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x2_SPLAT_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x) { vuint32x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VUINT32x2_SPLAT_DEFINED #endif -#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED +#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2]) { vuint32x2 vec; @@ -5079,7 +5074,7 @@ } # define VUINT32x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x2_LOAD_DEFINED +#if !defined(VUINT32x2_LOAD_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2]) { vuint32x2 vec; @@ -5088,21 +5083,21 @@ } # define VUINT32x2_LOAD_DEFINED #endif -#ifndef VUINT32x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT32x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x2_STORE_DEFINED -VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT32x2_STORE_DEFINED #endif -#ifndef VUINT32x2_ADD_DEFINED +#if !defined(VUINT32x2_ADD_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -5110,7 +5105,7 @@ } # define VUINT32x2_ADD_DEFINED #endif -#ifndef VUINT32x2_SUB_DEFINED +#if !defined(VUINT32x2_SUB_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -5118,7 +5113,7 @@ } # define VUINT32x2_SUB_DEFINED #endif -#ifndef VUINT32x2_MUL_DEFINED +#if !defined(VUINT32x2_MUL_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -5126,7 +5121,16 @@ } # define VUINT32x2_MUL_DEFINED #endif -#ifndef VUINT32x2_AND_DEFINED +#if !defined(VUINT32x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x2_AVG_DEFINED +#endif +#if !defined(VUINT32x2_AND_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -5134,7 +5138,7 @@ } # define VUINT32x2_AND_DEFINED #endif -#ifndef VUINT32x2_OR_DEFINED +#if !defined(VUINT32x2_OR_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -5142,7 +5146,7 @@ } # define VUINT32x2_OR_DEFINED #endif -#ifndef VUINT32x2_XOR_DEFINED +#if !defined(VUINT32x2_XOR_DEFINED) VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -5150,8 +5154,16 @@ } # define VUINT32x2_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_CMPLT_DEFINED +#if !defined(VUINT32x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x2_NOT_DEFINED +#endif +#if !defined(VUINT32x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -5159,9 +5171,8 @@ } # define VUINT32x2_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_CMPEQ_DEFINED +#if !defined(VUINT32x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -5169,9 +5180,8 @@ } # define VUINT32x2_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_CMPGT_DEFINED +#if !defined(VUINT32x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -5179,9 +5189,8 @@ } # define VUINT32x2_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_CMPLE_DEFINED +#if !defined(VUINT32x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -5189,9 +5198,8 @@ } # define VUINT32x2_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_CMPGE_DEFINED +#if !defined(VUINT32x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -5199,9 +5207,8 @@ } # define VUINT32x2_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_MIN_DEFINED +#if !defined(VUINT32x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2) { vuint32x2 mask; @@ -5211,9 +5218,8 @@ } # define VUINT32x2_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_MAX_DEFINED +#if !defined(VUINT32x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2) { vuint32x2 mask; @@ -5223,30 +5229,8 @@ } # define VUINT32x2_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_AVG_DEFINED -VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2) -{ - vint32x2 ones = vint32x2_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT32x2_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT32x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_RSHIFT_DEFINED +#if !defined(VUINT32x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -5254,9 +5238,8 @@ } # define VUINT32x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x2_LRSHIFT_DEFINED +#if !defined(VUINT32x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); @@ -5264,29 +5247,28 @@ } # define VUINT32x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT32x2_NOT_DEFINED -VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT32x2_NOT_DEFINED -#endif - - -/* vuint32x4 */ - -#ifndef VINT32x4_SPLAT_DEFINED +#if !defined(VUINT32x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x2_LSHIFT_DEFINED +#endif +#if !defined(VINT32x4_SPLAT_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) { vint32x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VINT32x4_SPLAT_DEFINED #endif -#ifndef VINT32x4_LOAD_ALIGNED_DEFINED +#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) { vint32x4 vec; @@ -5295,7 +5277,7 @@ } # define VINT32x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x4_LOAD_DEFINED +#if !defined(VINT32x4_LOAD_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) { vint32x4 vec; @@ -5304,21 +5286,21 @@ } # define VINT32x4_LOAD_DEFINED #endif -#ifndef VINT32x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT32x4_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x4_STORE_DEFINED -VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT32x4_STORE_DEFINED #endif -#ifndef VINT32x4_ADD_DEFINED +#if !defined(VINT32x4_ADD_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -5326,7 +5308,7 @@ } # define VINT32x4_ADD_DEFINED #endif -#ifndef VINT32x4_SUB_DEFINED +#if !defined(VINT32x4_SUB_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -5334,7 +5316,7 @@ } # define VINT32x4_SUB_DEFINED #endif -#ifndef VINT32x4_MUL_DEFINED +#if !defined(VINT32x4_MUL_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -5342,106 +5324,8 @@ } # define VINT32x4_MUL_DEFINED #endif -#ifndef VINT32x4_AND_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT32x4_AND_DEFINED -#endif -#ifndef VINT32x4_OR_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT32x4_OR_DEFINED -#endif -#ifndef VINT32x4_XOR_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT32x4_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_CMPLT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT32x4_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_CMPEQ_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT32x4_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_CMPGT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT32x4_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_CMPLE_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT32x4_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_CMPGE_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT32x4_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_MIN_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) -{ - vint32x4 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x4_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_MAX_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) -{ - vint32x4 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x4_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_AVG_DEFINED +#if !defined(VINT32x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) { vint32x4 ones = vint32x4_splat(1); @@ -5455,19 +5339,107 @@ } # define VINT32x4_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT32x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_RSHIFT_DEFINED +#if !defined(VINT32x4_AND_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x4_AND_DEFINED +#endif +#if !defined(VINT32x4_OR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x4_OR_DEFINED +#endif +#if !defined(VINT32x4_XOR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x4_XOR_DEFINED +#endif +#if !defined(VINT32x4_NOT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x4_NOT_DEFINED +#endif +#if !defined(VINT32x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x4_CMPLT_DEFINED +#endif +#if !defined(VINT32x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x4_CMPEQ_DEFINED +#endif +#if !defined(VINT32x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x4_CMPGT_DEFINED +#endif +#if !defined(VINT32x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x4_CMPLE_DEFINED +#endif +#if !defined(VINT32x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x4_CMPGE_DEFINED +#endif +#if !defined(VINT32x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x4_MIN_DEFINED +#endif +#if !defined(VINT32x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x4_MAX_DEFINED +#endif +#if !defined(VINT32x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -5475,9 +5447,8 @@ } # define VINT32x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x4_LRSHIFT_DEFINED +#if !defined(VINT32x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -5485,29 +5456,28 @@ } # define VINT32x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT32x4_NOT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT32x4_NOT_DEFINED -#endif - - -/* vint32x4 */ - -#ifndef VUINT32x4_SPLAT_DEFINED +#if !defined(VINT32x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_SPLAT_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) { vuint32x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VUINT32x4_SPLAT_DEFINED #endif -#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED +#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) { vuint32x4 vec; @@ -5516,7 +5486,7 @@ } # define VUINT32x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x4_LOAD_DEFINED +#if !defined(VUINT32x4_LOAD_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) { vuint32x4 vec; @@ -5525,21 +5495,21 @@ } # define VUINT32x4_LOAD_DEFINED #endif -#ifndef VUINT32x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT32x4_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x4_STORE_DEFINED -VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT32x4_STORE_DEFINED #endif -#ifndef VUINT32x4_ADD_DEFINED +#if !defined(VUINT32x4_ADD_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -5547,7 +5517,7 @@ } # define VUINT32x4_ADD_DEFINED #endif -#ifndef VUINT32x4_SUB_DEFINED +#if !defined(VUINT32x4_SUB_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -5555,7 +5525,7 @@ } # define VUINT32x4_SUB_DEFINED #endif -#ifndef VUINT32x4_MUL_DEFINED +#if !defined(VUINT32x4_MUL_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -5563,7 +5533,16 @@ } # define VUINT32x4_MUL_DEFINED #endif -#ifndef VUINT32x4_AND_DEFINED +#if !defined(VUINT32x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x4_AVG_DEFINED +#endif +#if !defined(VUINT32x4_AND_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -5571,7 +5550,7 @@ } # define VUINT32x4_AND_DEFINED #endif -#ifndef VUINT32x4_OR_DEFINED +#if !defined(VUINT32x4_OR_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -5579,7 +5558,7 @@ } # define VUINT32x4_OR_DEFINED #endif -#ifndef VUINT32x4_XOR_DEFINED +#if !defined(VUINT32x4_XOR_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -5587,8 +5566,16 @@ } # define VUINT32x4_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_CMPLT_DEFINED +#if !defined(VUINT32x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x4_NOT_DEFINED +#endif +#if !defined(VUINT32x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -5596,9 +5583,8 @@ } # define VUINT32x4_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_CMPEQ_DEFINED +#if !defined(VUINT32x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -5606,9 +5592,8 @@ } # define VUINT32x4_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_CMPGT_DEFINED +#if !defined(VUINT32x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -5616,9 +5601,8 @@ } # define VUINT32x4_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_CMPLE_DEFINED +#if !defined(VUINT32x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -5626,9 +5610,8 @@ } # define VUINT32x4_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_CMPGE_DEFINED +#if !defined(VUINT32x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -5636,9 +5619,8 @@ } # define VUINT32x4_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_MIN_DEFINED +#if !defined(VUINT32x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 mask; @@ -5648,9 +5630,8 @@ } # define VUINT32x4_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_MAX_DEFINED +#if !defined(VUINT32x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 mask; @@ -5660,30 +5641,8 @@ } # define VUINT32x4_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_AVG_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) -{ - vint32x4 ones = vint32x4_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT32x4_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT32x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_RSHIFT_DEFINED +#if !defined(VUINT32x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -5691,9 +5650,8 @@ } # define VUINT32x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x4_LRSHIFT_DEFINED +#if !defined(VUINT32x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -5701,29 +5659,32 @@ } # define VUINT32x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT32x4_NOT_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT32x4_NOT_DEFINED -#endif - - -/* vuint32x8 */ - -#ifndef VINT32x8_SPLAT_DEFINED +#if !defined(VUINT32x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VINT32x8_SPLAT_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x) { vint32x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VINT32x8_SPLAT_DEFINED #endif -#ifndef VINT32x8_LOAD_ALIGNED_DEFINED +#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8]) { vint32x8 vec; @@ -5732,7 +5693,7 @@ } # define VINT32x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x8_LOAD_DEFINED +#if !defined(VINT32x8_LOAD_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8]) { vint32x8 vec; @@ -5741,21 +5702,21 @@ } # define VINT32x8_LOAD_DEFINED #endif -#ifndef VINT32x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT32x8_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x8_STORE_DEFINED -VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT32x8_STORE_DEFINED #endif -#ifndef VINT32x8_ADD_DEFINED +#if !defined(VINT32x8_ADD_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -5763,7 +5724,7 @@ } # define VINT32x8_ADD_DEFINED #endif -#ifndef VINT32x8_SUB_DEFINED +#if !defined(VINT32x8_SUB_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -5771,7 +5732,7 @@ } # define VINT32x8_SUB_DEFINED #endif -#ifndef VINT32x8_MUL_DEFINED +#if !defined(VINT32x8_MUL_DEFINED) VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -5779,106 +5740,8 @@ } # define VINT32x8_MUL_DEFINED #endif -#ifndef VINT32x8_AND_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT32x8_AND_DEFINED -#endif -#ifndef VINT32x8_OR_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT32x8_OR_DEFINED -#endif -#ifndef VINT32x8_XOR_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT32x8_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_CMPLT_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT32x8_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_CMPEQ_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT32x8_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_CMPGT_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT32x8_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_CMPLE_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT32x8_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_CMPGE_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT32x8_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_MIN_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2) -{ - vint32x8 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x8_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_MAX_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2) -{ - vint32x8 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x8_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_AVG_DEFINED +#if !defined(VINT32x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2) { vint32x8 ones = vint32x8_splat(1); @@ -5892,19 +5755,107 @@ } # define VINT32x8_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT32x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_RSHIFT_DEFINED +#if !defined(VINT32x8_AND_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x8_AND_DEFINED +#endif +#if !defined(VINT32x8_OR_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x8_OR_DEFINED +#endif +#if !defined(VINT32x8_XOR_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x8_XOR_DEFINED +#endif +#if !defined(VINT32x8_NOT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x8_NOT_DEFINED +#endif +#if !defined(VINT32x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x8_CMPLT_DEFINED +#endif +#if !defined(VINT32x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x8_CMPEQ_DEFINED +#endif +#if !defined(VINT32x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x8_CMPGT_DEFINED +#endif +#if !defined(VINT32x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x8_CMPLE_DEFINED +#endif +#if !defined(VINT32x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x8_CMPGE_DEFINED +#endif +#if !defined(VINT32x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2) +{ + vint32x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x8_MIN_DEFINED +#endif +#if !defined(VINT32x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2) +{ + vint32x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x8_MAX_DEFINED +#endif +#if !defined(VINT32x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -5912,9 +5863,8 @@ } # define VINT32x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x8_LRSHIFT_DEFINED +#if !defined(VINT32x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -5922,29 +5872,32 @@ } # define VINT32x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT32x8_NOT_DEFINED -VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT32x8_NOT_DEFINED -#endif - - -/* vint32x8 */ - -#ifndef VUINT32x8_SPLAT_DEFINED +#if !defined(VINT32x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x8_SPLAT_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x) { vuint32x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VUINT32x8_SPLAT_DEFINED #endif -#ifndef VUINT32x8_LOAD_ALIGNED_DEFINED +#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8]) { vuint32x8 vec; @@ -5953,7 +5906,7 @@ } # define VUINT32x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x8_LOAD_DEFINED +#if !defined(VUINT32x8_LOAD_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8]) { vuint32x8 vec; @@ -5962,21 +5915,21 @@ } # define VUINT32x8_LOAD_DEFINED #endif -#ifndef VUINT32x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT32x8_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x8_STORE_DEFINED -VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT32x8_STORE_DEFINED #endif -#ifndef VUINT32x8_ADD_DEFINED +#if !defined(VUINT32x8_ADD_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -5984,7 +5937,7 @@ } # define VUINT32x8_ADD_DEFINED #endif -#ifndef VUINT32x8_SUB_DEFINED +#if !defined(VUINT32x8_SUB_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -5992,7 +5945,7 @@ } # define VUINT32x8_SUB_DEFINED #endif -#ifndef VUINT32x8_MUL_DEFINED +#if !defined(VUINT32x8_MUL_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -6000,7 +5953,16 @@ } # define VUINT32x8_MUL_DEFINED #endif -#ifndef VUINT32x8_AND_DEFINED +#if !defined(VUINT32x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x8_AVG_DEFINED +#endif +#if !defined(VUINT32x8_AND_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -6008,7 +5970,7 @@ } # define VUINT32x8_AND_DEFINED #endif -#ifndef VUINT32x8_OR_DEFINED +#if !defined(VUINT32x8_OR_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -6016,7 +5978,7 @@ } # define VUINT32x8_OR_DEFINED #endif -#ifndef VUINT32x8_XOR_DEFINED +#if !defined(VUINT32x8_XOR_DEFINED) VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -6024,8 +5986,16 @@ } # define VUINT32x8_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_CMPLT_DEFINED +#if !defined(VUINT32x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x8_NOT_DEFINED +#endif +#if !defined(VUINT32x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -6033,9 +6003,8 @@ } # define VUINT32x8_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_CMPEQ_DEFINED +#if !defined(VUINT32x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -6043,9 +6012,8 @@ } # define VUINT32x8_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_CMPGT_DEFINED +#if !defined(VUINT32x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -6053,9 +6021,8 @@ } # define VUINT32x8_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_CMPLE_DEFINED +#if !defined(VUINT32x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -6063,9 +6030,8 @@ } # define VUINT32x8_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_CMPGE_DEFINED +#if !defined(VUINT32x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -6073,9 +6039,8 @@ } # define VUINT32x8_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_MIN_DEFINED +#if !defined(VUINT32x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2) { vuint32x8 mask; @@ -6085,9 +6050,8 @@ } # define VUINT32x8_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_MAX_DEFINED +#if !defined(VUINT32x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2) { vuint32x8 mask; @@ -6097,30 +6061,8 @@ } # define VUINT32x8_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_AVG_DEFINED -VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2) -{ - vint32x8 ones = vint32x8_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT32x8_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT32x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_RSHIFT_DEFINED +#if !defined(VUINT32x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -6128,9 +6070,8 @@ } # define VUINT32x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x8_LRSHIFT_DEFINED +#if !defined(VUINT32x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -6138,29 +6079,40 @@ } # define VUINT32x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT32x8_NOT_DEFINED -VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT32x8_NOT_DEFINED -#endif - - -/* vuint32x16 */ - -#ifndef VINT32x16_SPLAT_DEFINED +#if !defined(VUINT32x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x8_LSHIFT_DEFINED +#endif +#if !defined(VINT32x16_SPLAT_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x) { vint32x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VINT32x16_SPLAT_DEFINED #endif -#ifndef VINT32x16_LOAD_ALIGNED_DEFINED +#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16]) { vint32x16 vec; @@ -6169,7 +6121,7 @@ } # define VINT32x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x16_LOAD_DEFINED +#if !defined(VINT32x16_LOAD_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16]) { vint32x16 vec; @@ -6178,21 +6130,21 @@ } # define VINT32x16_LOAD_DEFINED #endif -#ifndef VINT32x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT32x16_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x16_STORE_DEFINED -VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT32x16_STORE_DEFINED #endif -#ifndef VINT32x16_ADD_DEFINED +#if !defined(VINT32x16_ADD_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -6200,7 +6152,7 @@ } # define VINT32x16_ADD_DEFINED #endif -#ifndef VINT32x16_SUB_DEFINED +#if !defined(VINT32x16_SUB_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -6208,7 +6160,7 @@ } # define VINT32x16_SUB_DEFINED #endif -#ifndef VINT32x16_MUL_DEFINED +#if !defined(VINT32x16_MUL_DEFINED) VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -6216,106 +6168,8 @@ } # define VINT32x16_MUL_DEFINED #endif -#ifndef VINT32x16_AND_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT32x16_AND_DEFINED -#endif -#ifndef VINT32x16_OR_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT32x16_OR_DEFINED -#endif -#ifndef VINT32x16_XOR_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT32x16_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_CMPLT_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT32x16_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_CMPEQ_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT32x16_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_CMPGT_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT32x16_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_CMPLE_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT32x16_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_CMPGE_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT32x16_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_MIN_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2) -{ - vint32x16 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x16_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_MAX_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2) -{ - vint32x16 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT32x16_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_AVG_DEFINED +#if !defined(VINT32x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2) { vint32x16 ones = vint32x16_splat(1); @@ -6329,19 +6183,107 @@ } # define VINT32x16_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT32x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_RSHIFT_DEFINED +#if !defined(VINT32x16_AND_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x16_AND_DEFINED +#endif +#if !defined(VINT32x16_OR_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x16_OR_DEFINED +#endif +#if !defined(VINT32x16_XOR_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x16_XOR_DEFINED +#endif +#if !defined(VINT32x16_NOT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x16_NOT_DEFINED +#endif +#if !defined(VINT32x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x16_CMPLT_DEFINED +#endif +#if !defined(VINT32x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x16_CMPEQ_DEFINED +#endif +#if !defined(VINT32x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x16_CMPGT_DEFINED +#endif +#if !defined(VINT32x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x16_CMPLE_DEFINED +#endif +#if !defined(VINT32x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x16_CMPGE_DEFINED +#endif +#if !defined(VINT32x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2) +{ + vint32x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x16_MIN_DEFINED +#endif +#if !defined(VINT32x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2) +{ + vint32x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x16_MAX_DEFINED +#endif +#if !defined(VINT32x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -6349,9 +6291,8 @@ } # define VINT32x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT32x16_LRSHIFT_DEFINED +#if !defined(VINT32x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -6359,29 +6300,40 @@ } # define VINT32x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT32x16_NOT_DEFINED -VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT32x16_NOT_DEFINED -#endif - - -/* vint32x16 */ - -#ifndef VUINT32x16_SPLAT_DEFINED +#if !defined(VINT32x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x16_SPLAT_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x) { vuint32x16 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; return vec; } # define VUINT32x16_SPLAT_DEFINED #endif -#ifndef VUINT32x16_LOAD_ALIGNED_DEFINED +#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16]) { vuint32x16 vec; @@ -6390,7 +6342,7 @@ } # define VUINT32x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x16_LOAD_DEFINED +#if !defined(VUINT32x16_LOAD_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16]) { vuint32x16 vec; @@ -6399,21 +6351,21 @@ } # define VUINT32x16_LOAD_DEFINED #endif -#ifndef VUINT32x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 arr[16]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT32x16_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x16_STORE_DEFINED -VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT32x16_STORE_DEFINED #endif -#ifndef VUINT32x16_ADD_DEFINED +#if !defined(VUINT32x16_ADD_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -6421,7 +6373,7 @@ } # define VUINT32x16_ADD_DEFINED #endif -#ifndef VUINT32x16_SUB_DEFINED +#if !defined(VUINT32x16_SUB_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -6429,7 +6381,7 @@ } # define VUINT32x16_SUB_DEFINED #endif -#ifndef VUINT32x16_MUL_DEFINED +#if !defined(VUINT32x16_MUL_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -6437,7 +6389,16 @@ } # define VUINT32x16_MUL_DEFINED #endif -#ifndef VUINT32x16_AND_DEFINED +#if !defined(VUINT32x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x16_AVG_DEFINED +#endif +#if !defined(VUINT32x16_AND_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -6445,7 +6406,7 @@ } # define VUINT32x16_AND_DEFINED #endif -#ifndef VUINT32x16_OR_DEFINED +#if !defined(VUINT32x16_OR_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -6453,7 +6414,7 @@ } # define VUINT32x16_OR_DEFINED #endif -#ifndef VUINT32x16_XOR_DEFINED +#if !defined(VUINT32x16_XOR_DEFINED) VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -6461,8 +6422,16 @@ } # define VUINT32x16_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_CMPLT_DEFINED +#if !defined(VUINT32x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x16_NOT_DEFINED +#endif +#if !defined(VUINT32x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -6470,9 +6439,8 @@ } # define VUINT32x16_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_CMPEQ_DEFINED +#if !defined(VUINT32x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -6480,9 +6448,8 @@ } # define VUINT32x16_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_CMPGT_DEFINED +#if !defined(VUINT32x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -6490,9 +6457,8 @@ } # define VUINT32x16_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_CMPLE_DEFINED +#if !defined(VUINT32x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -6500,9 +6466,8 @@ } # define VUINT32x16_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_CMPGE_DEFINED +#if !defined(VUINT32x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -6510,9 +6475,8 @@ } # define VUINT32x16_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_MIN_DEFINED +#if !defined(VUINT32x16_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2) { vuint32x16 mask; @@ -6522,9 +6486,8 @@ } # define VUINT32x16_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_MAX_DEFINED +#if !defined(VUINT32x16_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2) { vuint32x16 mask; @@ -6534,30 +6497,8 @@ } # define VUINT32x16_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_AVG_DEFINED -VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2) -{ - vint32x16 ones = vint32x16_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT32x16_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT32x16_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_RSHIFT_DEFINED +#if !defined(VUINT32x16_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -6565,9 +6506,8 @@ } # define VUINT32x16_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT32x16_LRSHIFT_DEFINED +#if !defined(VUINT32x16_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -6575,29 +6515,26 @@ } # define VUINT32x16_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT32x16_NOT_DEFINED -VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT32x16_NOT_DEFINED -#endif - - -/* vuint64x2 */ - -#ifndef VINT64x2_SPLAT_DEFINED +#if !defined(VUINT32x16_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x16_LSHIFT_DEFINED +#endif +#if !defined(VINT64x2_SPLAT_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x) { vint64x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VINT64x2_SPLAT_DEFINED #endif -#ifndef VINT64x2_LOAD_ALIGNED_DEFINED +#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2]) { vint64x2 vec; @@ -6606,7 +6543,7 @@ } # define VINT64x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT64x2_LOAD_DEFINED +#if !defined(VINT64x2_LOAD_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2]) { vint64x2 vec; @@ -6615,21 +6552,21 @@ } # define VINT64x2_LOAD_DEFINED #endif -#ifndef VINT64x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT64x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT64x2_STORE_DEFINED -VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT64x2_STORE_DEFINED #endif -#ifndef VINT64x2_ADD_DEFINED +#if !defined(VINT64x2_ADD_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -6637,7 +6574,7 @@ } # define VINT64x2_ADD_DEFINED #endif -#ifndef VINT64x2_SUB_DEFINED +#if !defined(VINT64x2_SUB_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -6645,7 +6582,7 @@ } # define VINT64x2_SUB_DEFINED #endif -#ifndef VINT64x2_MUL_DEFINED +#if !defined(VINT64x2_MUL_DEFINED) VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -6653,106 +6590,8 @@ } # define VINT64x2_MUL_DEFINED #endif -#ifndef VINT64x2_AND_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT64x2_AND_DEFINED -#endif -#ifndef VINT64x2_OR_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT64x2_OR_DEFINED -#endif -#ifndef VINT64x2_XOR_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT64x2_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_CMPLT_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT64x2_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_CMPEQ_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT64x2_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_CMPGT_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT64x2_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_CMPLE_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT64x2_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_CMPGE_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT64x2_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_MIN_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2) -{ - vint64x2 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x2_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_MAX_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2) -{ - vint64x2 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x2_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_AVG_DEFINED +#if !defined(VINT64x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2) { vint64x2 ones = vint64x2_splat(1); @@ -6766,19 +6605,107 @@ } # define VINT64x2_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT64x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_RSHIFT_DEFINED +#if !defined(VINT64x2_AND_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x2_AND_DEFINED +#endif +#if !defined(VINT64x2_OR_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x2_OR_DEFINED +#endif +#if !defined(VINT64x2_XOR_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x2_XOR_DEFINED +#endif +#if !defined(VINT64x2_NOT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x2_NOT_DEFINED +#endif +#if !defined(VINT64x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x2_CMPLT_DEFINED +#endif +#if !defined(VINT64x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x2_CMPEQ_DEFINED +#endif +#if !defined(VINT64x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x2_CMPGT_DEFINED +#endif +#if !defined(VINT64x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x2_CMPLE_DEFINED +#endif +#if !defined(VINT64x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x2_CMPGE_DEFINED +#endif +#if !defined(VINT64x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2) +{ + vint64x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x2_MIN_DEFINED +#endif +#if !defined(VINT64x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2) +{ + vint64x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x2_MAX_DEFINED +#endif +#if !defined(VINT64x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -6786,9 +6713,8 @@ } # define VINT64x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x2_LRSHIFT_DEFINED +#if !defined(VINT64x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -6796,29 +6722,26 @@ } # define VINT64x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT64x2_NOT_DEFINED -VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT64x2_NOT_DEFINED -#endif - - -/* vint64x2 */ - -#ifndef VUINT64x2_SPLAT_DEFINED +#if !defined(VINT64x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x2_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x2_SPLAT_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x) { vuint64x2 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; return vec; } # define VUINT64x2_SPLAT_DEFINED #endif -#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED +#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2]) { vuint64x2 vec; @@ -6827,7 +6750,7 @@ } # define VUINT64x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT64x2_LOAD_DEFINED +#if !defined(VUINT64x2_LOAD_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2]) { vuint64x2 vec; @@ -6836,21 +6759,21 @@ } # define VUINT64x2_LOAD_DEFINED #endif -#ifndef VUINT64x2_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 arr[2]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT64x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT64x2_STORE_DEFINED -VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 arr[2]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT64x2_STORE_DEFINED #endif -#ifndef VUINT64x2_ADD_DEFINED +#if !defined(VUINT64x2_ADD_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -6858,7 +6781,7 @@ } # define VUINT64x2_ADD_DEFINED #endif -#ifndef VUINT64x2_SUB_DEFINED +#if !defined(VUINT64x2_SUB_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -6866,7 +6789,7 @@ } # define VUINT64x2_SUB_DEFINED #endif -#ifndef VUINT64x2_MUL_DEFINED +#if !defined(VUINT64x2_MUL_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -6874,7 +6797,16 @@ } # define VUINT64x2_MUL_DEFINED #endif -#ifndef VUINT64x2_AND_DEFINED +#if !defined(VUINT64x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x2_AVG_DEFINED +#endif +#if !defined(VUINT64x2_AND_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -6882,7 +6814,7 @@ } # define VUINT64x2_AND_DEFINED #endif -#ifndef VUINT64x2_OR_DEFINED +#if !defined(VUINT64x2_OR_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -6890,7 +6822,7 @@ } # define VUINT64x2_OR_DEFINED #endif -#ifndef VUINT64x2_XOR_DEFINED +#if !defined(VUINT64x2_XOR_DEFINED) VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -6898,8 +6830,16 @@ } # define VUINT64x2_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_CMPLT_DEFINED +#if !defined(VUINT64x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x2_NOT_DEFINED +#endif +#if !defined(VUINT64x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -6907,9 +6847,8 @@ } # define VUINT64x2_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_CMPEQ_DEFINED +#if !defined(VUINT64x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -6917,9 +6856,8 @@ } # define VUINT64x2_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_CMPGT_DEFINED +#if !defined(VUINT64x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -6927,9 +6865,8 @@ } # define VUINT64x2_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_CMPLE_DEFINED +#if !defined(VUINT64x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -6937,9 +6874,8 @@ } # define VUINT64x2_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_CMPGE_DEFINED +#if !defined(VUINT64x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -6947,9 +6883,8 @@ } # define VUINT64x2_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_MIN_DEFINED +#if !defined(VUINT64x2_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2) { vuint64x2 mask; @@ -6959,9 +6894,8 @@ } # define VUINT64x2_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_MAX_DEFINED +#if !defined(VUINT64x2_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2) { vuint64x2 mask; @@ -6971,30 +6905,8 @@ } # define VUINT64x2_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_AVG_DEFINED -VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2) -{ - vint64x2 ones = vint64x2_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT64x2_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT64x2_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_RSHIFT_DEFINED +#if !defined(VUINT64x2_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -7002,9 +6914,8 @@ } # define VUINT64x2_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x2_LRSHIFT_DEFINED +#if !defined(VUINT64x2_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); @@ -7012,29 +6923,28 @@ } # define VUINT64x2_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT64x2_NOT_DEFINED -VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT64x2_NOT_DEFINED -#endif - - -/* vuint64x4 */ - -#ifndef VINT64x4_SPLAT_DEFINED +#if !defined(VUINT64x2_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT64x2_LSHIFT_DEFINED +#endif +#if !defined(VINT64x4_SPLAT_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x) { vint64x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VINT64x4_SPLAT_DEFINED #endif -#ifndef VINT64x4_LOAD_ALIGNED_DEFINED +#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4]) { vint64x4 vec; @@ -7043,7 +6953,7 @@ } # define VINT64x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT64x4_LOAD_DEFINED +#if !defined(VINT64x4_LOAD_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4]) { vint64x4 vec; @@ -7052,21 +6962,21 @@ } # define VINT64x4_LOAD_DEFINED #endif -#ifndef VINT64x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT64x4_STORE_ALIGNED_DEFINED #endif -#ifndef VINT64x4_STORE_DEFINED -VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT64x4_STORE_DEFINED #endif -#ifndef VINT64x4_ADD_DEFINED +#if !defined(VINT64x4_ADD_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -7074,7 +6984,7 @@ } # define VINT64x4_ADD_DEFINED #endif -#ifndef VINT64x4_SUB_DEFINED +#if !defined(VINT64x4_SUB_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -7082,7 +6992,7 @@ } # define VINT64x4_SUB_DEFINED #endif -#ifndef VINT64x4_MUL_DEFINED +#if !defined(VINT64x4_MUL_DEFINED) VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -7090,106 +7000,8 @@ } # define VINT64x4_MUL_DEFINED #endif -#ifndef VINT64x4_AND_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT64x4_AND_DEFINED -#endif -#ifndef VINT64x4_OR_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT64x4_OR_DEFINED -#endif -#ifndef VINT64x4_XOR_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT64x4_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_CMPLT_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT64x4_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_CMPEQ_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT64x4_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_CMPGT_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT64x4_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_CMPLE_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT64x4_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_CMPGE_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT64x4_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_MIN_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2) -{ - vint64x4 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x4_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_MAX_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2) -{ - vint64x4 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x4_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_AVG_DEFINED +#if !defined(VINT64x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2) { vint64x4 ones = vint64x4_splat(1); @@ -7203,19 +7015,107 @@ } # define VINT64x4_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT64x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_RSHIFT_DEFINED +#if !defined(VINT64x4_AND_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x4_AND_DEFINED +#endif +#if !defined(VINT64x4_OR_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x4_OR_DEFINED +#endif +#if !defined(VINT64x4_XOR_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x4_XOR_DEFINED +#endif +#if !defined(VINT64x4_NOT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x4_NOT_DEFINED +#endif +#if !defined(VINT64x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x4_CMPLT_DEFINED +#endif +#if !defined(VINT64x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x4_CMPEQ_DEFINED +#endif +#if !defined(VINT64x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x4_CMPGT_DEFINED +#endif +#if !defined(VINT64x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x4_CMPLE_DEFINED +#endif +#if !defined(VINT64x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x4_CMPGE_DEFINED +#endif +#if !defined(VINT64x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2) +{ + vint64x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x4_MIN_DEFINED +#endif +#if !defined(VINT64x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2) +{ + vint64x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x4_MAX_DEFINED +#endif +#if !defined(VINT64x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -7223,9 +7123,8 @@ } # define VINT64x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x4_LRSHIFT_DEFINED +#if !defined(VINT64x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -7233,29 +7132,28 @@ } # define VINT64x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT64x4_NOT_DEFINED -VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT64x4_NOT_DEFINED -#endif - - -/* vint64x4 */ - -#ifndef VUINT64x4_SPLAT_DEFINED +#if !defined(VINT64x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x4_SPLAT_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x) { vuint64x4 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; return vec; } # define VUINT64x4_SPLAT_DEFINED #endif -#ifndef VUINT64x4_LOAD_ALIGNED_DEFINED +#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4]) { vuint64x4 vec; @@ -7264,7 +7162,7 @@ } # define VUINT64x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT64x4_LOAD_DEFINED +#if !defined(VUINT64x4_LOAD_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4]) { vuint64x4 vec; @@ -7273,21 +7171,21 @@ } # define VUINT64x4_LOAD_DEFINED #endif -#ifndef VUINT64x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 arr[4]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT64x4_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT64x4_STORE_DEFINED -VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT64x4_STORE_DEFINED #endif -#ifndef VUINT64x4_ADD_DEFINED +#if !defined(VUINT64x4_ADD_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -7295,7 +7193,7 @@ } # define VUINT64x4_ADD_DEFINED #endif -#ifndef VUINT64x4_SUB_DEFINED +#if !defined(VUINT64x4_SUB_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -7303,7 +7201,7 @@ } # define VUINT64x4_SUB_DEFINED #endif -#ifndef VUINT64x4_MUL_DEFINED +#if !defined(VUINT64x4_MUL_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -7311,7 +7209,16 @@ } # define VUINT64x4_MUL_DEFINED #endif -#ifndef VUINT64x4_AND_DEFINED +#if !defined(VUINT64x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x4_AVG_DEFINED +#endif +#if !defined(VUINT64x4_AND_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -7319,7 +7226,7 @@ } # define VUINT64x4_AND_DEFINED #endif -#ifndef VUINT64x4_OR_DEFINED +#if !defined(VUINT64x4_OR_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -7327,7 +7234,7 @@ } # define VUINT64x4_OR_DEFINED #endif -#ifndef VUINT64x4_XOR_DEFINED +#if !defined(VUINT64x4_XOR_DEFINED) VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -7335,8 +7242,16 @@ } # define VUINT64x4_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_CMPLT_DEFINED +#if !defined(VUINT64x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x4_NOT_DEFINED +#endif +#if !defined(VUINT64x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -7344,9 +7259,8 @@ } # define VUINT64x4_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_CMPEQ_DEFINED +#if !defined(VUINT64x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -7354,9 +7268,8 @@ } # define VUINT64x4_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_CMPGT_DEFINED +#if !defined(VUINT64x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -7364,9 +7277,8 @@ } # define VUINT64x4_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_CMPLE_DEFINED +#if !defined(VUINT64x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -7374,9 +7286,8 @@ } # define VUINT64x4_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_CMPGE_DEFINED +#if !defined(VUINT64x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -7384,9 +7295,8 @@ } # define VUINT64x4_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_MIN_DEFINED +#if !defined(VUINT64x4_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2) { vuint64x4 mask; @@ -7396,9 +7306,8 @@ } # define VUINT64x4_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_MAX_DEFINED +#if !defined(VUINT64x4_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2) { vuint64x4 mask; @@ -7408,30 +7317,8 @@ } # define VUINT64x4_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_AVG_DEFINED -VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2) -{ - vint64x4 ones = vint64x4_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT64x4_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VUINT64x4_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_RSHIFT_DEFINED +#if !defined(VUINT64x4_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -7439,9 +7326,8 @@ } # define VUINT64x4_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x4_LRSHIFT_DEFINED +#if !defined(VUINT64x4_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); @@ -7449,29 +7335,32 @@ } # define VUINT64x4_LRSHIFT_DEFINED #endif -#endif -#ifndef VUINT64x4_NOT_DEFINED -VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT64x4_NOT_DEFINED -#endif - - -/* vuint64x8 */ - -#ifndef VINT64x8_SPLAT_DEFINED +#if !defined(VUINT64x4_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT64x4_LSHIFT_DEFINED +#endif +#if !defined(VINT64x8_SPLAT_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x) { vint64x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VINT64x8_SPLAT_DEFINED #endif -#ifndef VINT64x8_LOAD_ALIGNED_DEFINED +#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8]) { vint64x8 vec; @@ -7480,7 +7369,7 @@ } # define VINT64x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT64x8_LOAD_DEFINED +#if !defined(VINT64x8_LOAD_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8]) { vint64x8 vec; @@ -7489,21 +7378,21 @@ } # define VINT64x8_LOAD_DEFINED #endif -#ifndef VINT64x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VINT64x8_STORE_ALIGNED_DEFINED #endif -#ifndef VINT64x8_STORE_DEFINED -VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VINT64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VINT64x8_STORE_DEFINED #endif -#ifndef VINT64x8_ADD_DEFINED +#if !defined(VINT64x8_ADD_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -7511,7 +7400,7 @@ } # define VINT64x8_ADD_DEFINED #endif -#ifndef VINT64x8_SUB_DEFINED +#if !defined(VINT64x8_SUB_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -7519,7 +7408,7 @@ } # define VINT64x8_SUB_DEFINED #endif -#ifndef VINT64x8_MUL_DEFINED +#if !defined(VINT64x8_MUL_DEFINED) VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -7527,106 +7416,8 @@ } # define VINT64x8_MUL_DEFINED #endif -#ifndef VINT64x8_AND_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc & vec2.gcc); - return vec1; -} -# define VINT64x8_AND_DEFINED -#endif -#ifndef VINT64x8_OR_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc | vec2.gcc); - return vec1; -} -# define VINT64x8_OR_DEFINED -#endif -#ifndef VINT64x8_XOR_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc ^ vec2.gcc); - return vec1; -} -# define VINT64x8_XOR_DEFINED -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_CMPLT_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc < vec2.gcc); - return vec1; -} -# define VINT64x8_CMPLT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_CMPEQ_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc == vec2.gcc); - return vec1; -} -# define VINT64x8_CMPEQ_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_CMPGT_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc > vec2.gcc); - return vec1; -} -# define VINT64x8_CMPGT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_CMPLE_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc <= vec2.gcc); - return vec1; -} -# define VINT64x8_CMPLE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_CMPGE_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2) -{ - vec1.gcc = (vec1.gcc >= vec2.gcc); - return vec1; -} -# define VINT64x8_CMPGE_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_MIN_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2) -{ - vint64x8 mask; - mask.gcc = (vec1.gcc < vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x8_MIN_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_MAX_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2) -{ - vint64x8 mask; - mask.gcc = (vec1.gcc > vec2.gcc); - vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); - return vec1; -} -# define VINT64x8_MAX_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_AVG_DEFINED +#if !defined(VINT64x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2) { vint64x8 ones = vint64x8_splat(1); @@ -7640,19 +7431,107 @@ } # define VINT64x8_AVG_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2) -{ - vec1.gcc = (vec1.gcc << vec2.gcc); - return vec1; -} -# define VINT64x8_LSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_RSHIFT_DEFINED +#if !defined(VINT64x8_AND_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x8_AND_DEFINED +#endif +#if !defined(VINT64x8_OR_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x8_OR_DEFINED +#endif +#if !defined(VINT64x8_XOR_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x8_XOR_DEFINED +#endif +#if !defined(VINT64x8_NOT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x8_NOT_DEFINED +#endif +#if !defined(VINT64x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x8_CMPLT_DEFINED +#endif +#if !defined(VINT64x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x8_CMPEQ_DEFINED +#endif +#if !defined(VINT64x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x8_CMPGT_DEFINED +#endif +#if !defined(VINT64x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x8_CMPLE_DEFINED +#endif +#if !defined(VINT64x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x8_CMPGE_DEFINED +#endif +#if !defined(VINT64x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2) +{ + vint64x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x8_MIN_DEFINED +#endif +#if !defined(VINT64x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2) +{ + vint64x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x8_MAX_DEFINED +#endif +#if !defined(VINT64x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc >> vec2.gcc); @@ -7660,9 +7539,8 @@ } # define VINT64x8_RSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VINT64x8_LRSHIFT_DEFINED +#if !defined(VINT64x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); @@ -7670,29 +7548,32 @@ } # define VINT64x8_LRSHIFT_DEFINED #endif -#endif -#ifndef VINT64x8_NOT_DEFINED -VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VINT64x8_NOT_DEFINED -#endif - - -/* vint64x8 */ - -#ifndef VUINT64x8_SPLAT_DEFINED +#if !defined(VINT64x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_SPLAT_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x) { vuint64x8 vec; - vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; return vec; } # define VUINT64x8_SPLAT_DEFINED #endif -#ifndef VUINT64x8_LOAD_ALIGNED_DEFINED +#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8]) { vuint64x8 vec; @@ -7701,7 +7582,7 @@ } # define VUINT64x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT64x8_LOAD_DEFINED +#if !defined(VUINT64x8_LOAD_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8]) { vuint64x8 vec; @@ -7710,21 +7591,21 @@ } # define VUINT64x8_LOAD_DEFINED #endif -#ifndef VUINT64x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 arr[8]) -{ - *(__typeof__(vec.gcc) *)arr = vec.gcc; +#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; } # define VUINT64x8_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT64x8_STORE_DEFINED -VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); +#if !defined(VUINT64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); } # define VUINT64x8_STORE_DEFINED #endif -#ifndef VUINT64x8_ADD_DEFINED +#if !defined(VUINT64x8_ADD_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc + vec2.gcc); @@ -7732,7 +7613,7 @@ } # define VUINT64x8_ADD_DEFINED #endif -#ifndef VUINT64x8_SUB_DEFINED +#if !defined(VUINT64x8_SUB_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc - vec2.gcc); @@ -7740,7 +7621,7 @@ } # define VUINT64x8_SUB_DEFINED #endif -#ifndef VUINT64x8_MUL_DEFINED +#if !defined(VUINT64x8_MUL_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc * vec2.gcc); @@ -7748,7 +7629,16 @@ } # define VUINT64x8_MUL_DEFINED #endif -#ifndef VUINT64x8_AND_DEFINED +#if !defined(VUINT64x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x8_AVG_DEFINED +#endif +#if !defined(VUINT64x8_AND_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc & vec2.gcc); @@ -7756,7 +7646,7 @@ } # define VUINT64x8_AND_DEFINED #endif -#ifndef VUINT64x8_OR_DEFINED +#if !defined(VUINT64x8_OR_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc | vec2.gcc); @@ -7764,7 +7654,7 @@ } # define VUINT64x8_OR_DEFINED #endif -#ifndef VUINT64x8_XOR_DEFINED +#if !defined(VUINT64x8_XOR_DEFINED) VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc ^ vec2.gcc); @@ -7772,8 +7662,16 @@ } # define VUINT64x8_XOR_DEFINED #endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_CMPLT_DEFINED +#if !defined(VUINT64x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x8_NOT_DEFINED +#endif +#if !defined(VUINT64x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc < vec2.gcc); @@ -7781,9 +7679,8 @@ } # define VUINT64x8_CMPLT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_CMPEQ_DEFINED +#if !defined(VUINT64x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc == vec2.gcc); @@ -7791,9 +7688,8 @@ } # define VUINT64x8_CMPEQ_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_CMPGT_DEFINED +#if !defined(VUINT64x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc > vec2.gcc); @@ -7801,9 +7697,8 @@ } # define VUINT64x8_CMPGT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_CMPLE_DEFINED +#if !defined(VUINT64x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc <= vec2.gcc); @@ -7811,9 +7706,8 @@ } # define VUINT64x8_CMPLE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_CMPGE_DEFINED +#if !defined(VUINT64x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc >= vec2.gcc); @@ -7821,9 +7715,8 @@ } # define VUINT64x8_CMPGE_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_MIN_DEFINED +#if !defined(VUINT64x8_MIN_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2) { vuint64x8 mask; @@ -7833,9 +7726,8 @@ } # define VUINT64x8_MIN_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_MAX_DEFINED +#if !defined(VUINT64x8_MAX_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2) { vuint64x8 mask; @@ -7845,20 +7737,26 @@ } # define VUINT64x8_MAX_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_AVG_DEFINED -VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2) -{ - vint64x8 ones = vint64x8_splat(1); - vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc); - return vec1; -} -# define VUINT64x8_AVG_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_LSHIFT_DEFINED +#if !defined(VUINT64x8_RSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_LRSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_LSHIFT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2) { vec1.gcc = (vec1.gcc << vec2.gcc); @@ -7866,34 +7764,873 @@ } # define VUINT64x8_LSHIFT_DEFINED #endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_RSHIFT_DEFINED -VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2) -{ - vec1.gcc = (vec1.gcc >> vec2.gcc); - return vec1; -} -# define VUINT64x8_RSHIFT_DEFINED -#endif -#endif -#if VEC_GNUC_ATLEAST(4, 3, 0) -#ifndef VUINT64x8_LRSHIFT_DEFINED -VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2) -{ - vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); - return vec1; -} -# define VUINT64x8_LRSHIFT_DEFINED -#endif -#endif -#ifndef VUINT64x8_NOT_DEFINED -VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec) -{ - vec.gcc = ~vec.gcc; - return vec; -} -# define VUINT64x8_NOT_DEFINED -#endif -#endif /* VEC_IMPL_GCC_H_ */ - +#if !defined(VF32x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x) +{ + vf32x2 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + return vec; +} +# define VF32x2_SPLAT_DEFINED +#endif +#if !defined(VF32x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2]) +{ + vf32x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF32x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2]) +{ + vf32x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF32x2_LOAD_DEFINED +#endif +#if !defined(VF32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF32x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF32x2_STORE_DEFINED +#endif +#if !defined(VF32x2_ADD_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF32x2_ADD_DEFINED +#endif +#if !defined(VF32x2_SUB_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF32x2_SUB_DEFINED +#endif +#if !defined(VF32x2_MUL_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF32x2_MUL_DEFINED +#endif +#if !defined(VF32x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF32x2_AVG_DEFINED +#endif +#if !defined(VF32x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF32x2_CMPLT_DEFINED +#endif +#if !defined(VF32x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF32x2_CMPEQ_DEFINED +#endif +#if !defined(VF32x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF32x2_CMPGT_DEFINED +#endif +#if !defined(VF32x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF32x2_CMPLE_DEFINED +#endif +#if !defined(VF32x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF32x2_CMPGE_DEFINED +#endif +#if !defined(VF32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x) +{ + vf32x4 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + return vec; +} +# define VF32x4_SPLAT_DEFINED +#endif +#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4]) +{ + vf32x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF32x4_LOAD_DEFINED +#endif +#if !defined(VF32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF32x4_STORE_DEFINED +#endif +#if !defined(VF32x4_ADD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF32x4_ADD_DEFINED +#endif +#if !defined(VF32x4_SUB_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF32x4_SUB_DEFINED +#endif +#if !defined(VF32x4_MUL_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF32x4_MUL_DEFINED +#endif +#if !defined(VF32x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF32x4_AVG_DEFINED +#endif +#if !defined(VF32x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF32x4_CMPLT_DEFINED +#endif +#if !defined(VF32x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF32x4_CMPEQ_DEFINED +#endif +#if !defined(VF32x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF32x4_CMPGT_DEFINED +#endif +#if !defined(VF32x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF32x4_CMPLE_DEFINED +#endif +#if !defined(VF32x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF32x4_CMPGE_DEFINED +#endif +#if !defined(VF32x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x) +{ + vf32x8 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + return vec; +} +# define VF32x8_SPLAT_DEFINED +#endif +#if !defined(VF32x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8]) +{ + vf32x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF32x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8]) +{ + vf32x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF32x8_LOAD_DEFINED +#endif +#if !defined(VF32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF32x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF32x8_STORE_DEFINED +#endif +#if !defined(VF32x8_ADD_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF32x8_ADD_DEFINED +#endif +#if !defined(VF32x8_SUB_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF32x8_SUB_DEFINED +#endif +#if !defined(VF32x8_MUL_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF32x8_MUL_DEFINED +#endif +#if !defined(VF32x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF32x8_AVG_DEFINED +#endif +#if !defined(VF32x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF32x8_CMPLT_DEFINED +#endif +#if !defined(VF32x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF32x8_CMPEQ_DEFINED +#endif +#if !defined(VF32x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF32x8_CMPGT_DEFINED +#endif +#if !defined(VF32x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF32x8_CMPLE_DEFINED +#endif +#if !defined(VF32x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF32x8_CMPGE_DEFINED +#endif +#if !defined(VF32x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x) +{ + vf32x16 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + vec.gcc[8] = x; + vec.gcc[9] = x; + vec.gcc[10] = x; + vec.gcc[11] = x; + vec.gcc[12] = x; + vec.gcc[13] = x; + vec.gcc[14] = x; + vec.gcc[15] = x; + return vec; +} +# define VF32x16_SPLAT_DEFINED +#endif +#if !defined(VF32x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16]) +{ + vf32x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF32x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16]) +{ + vf32x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF32x16_LOAD_DEFINED +#endif +#if !defined(VF32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF32x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF32x16_STORE_DEFINED +#endif +#if !defined(VF32x16_ADD_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF32x16_ADD_DEFINED +#endif +#if !defined(VF32x16_SUB_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF32x16_SUB_DEFINED +#endif +#if !defined(VF32x16_MUL_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF32x16_MUL_DEFINED +#endif +#if !defined(VF32x16_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF32x16_AVG_DEFINED +#endif +#if !defined(VF32x16_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF32x16_CMPLT_DEFINED +#endif +#if !defined(VF32x16_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF32x16_CMPEQ_DEFINED +#endif +#if !defined(VF32x16_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF32x16_CMPGT_DEFINED +#endif +#if !defined(VF32x16_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF32x16_CMPLE_DEFINED +#endif +#if !defined(VF32x16_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF32x16_CMPGE_DEFINED +#endif +#if !defined(VF64x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x) +{ + vf64x2 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + return vec; +} +# define VF64x2_SPLAT_DEFINED +#endif +#if !defined(VF64x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2]) +{ + vf64x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2]) +{ + vf64x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF64x2_LOAD_DEFINED +#endif +#if !defined(VF64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF64x2_STORE_DEFINED +#endif +#if !defined(VF64x2_ADD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF64x2_ADD_DEFINED +#endif +#if !defined(VF64x2_SUB_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF64x2_SUB_DEFINED +#endif +#if !defined(VF64x2_MUL_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF64x2_MUL_DEFINED +#endif +#if !defined(VF64x2_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF64x2_AVG_DEFINED +#endif +#if !defined(VF64x2_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF64x2_CMPLT_DEFINED +#endif +#if !defined(VF64x2_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF64x2_CMPEQ_DEFINED +#endif +#if !defined(VF64x2_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF64x2_CMPGT_DEFINED +#endif +#if !defined(VF64x2_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF64x2_CMPLE_DEFINED +#endif +#if !defined(VF64x2_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF64x2_CMPGE_DEFINED +#endif +#if !defined(VF64x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x) +{ + vf64x4 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + return vec; +} +# define VF64x4_SPLAT_DEFINED +#endif +#if !defined(VF64x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4]) +{ + vf64x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF64x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4]) +{ + vf64x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF64x4_LOAD_DEFINED +#endif +#if !defined(VF64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF64x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF64x4_STORE_DEFINED +#endif +#if !defined(VF64x4_ADD_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF64x4_ADD_DEFINED +#endif +#if !defined(VF64x4_SUB_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF64x4_SUB_DEFINED +#endif +#if !defined(VF64x4_MUL_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF64x4_MUL_DEFINED +#endif +#if !defined(VF64x4_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF64x4_AVG_DEFINED +#endif +#if !defined(VF64x4_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF64x4_CMPLT_DEFINED +#endif +#if !defined(VF64x4_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF64x4_CMPEQ_DEFINED +#endif +#if !defined(VF64x4_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF64x4_CMPGT_DEFINED +#endif +#if !defined(VF64x4_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF64x4_CMPLE_DEFINED +#endif +#if !defined(VF64x4_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF64x4_CMPGE_DEFINED +#endif +#if !defined(VF64x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x) +{ + vf64x8 vec; + vec.gcc[0] = x; + vec.gcc[1] = x; + vec.gcc[2] = x; + vec.gcc[3] = x; + vec.gcc[4] = x; + vec.gcc[5] = x; + vec.gcc[6] = x; + vec.gcc[7] = x; + return vec; +} +# define VF64x8_SPLAT_DEFINED +#endif +#if !defined(VF64x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8]) +{ + vf64x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VF64x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8]) +{ + vf64x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VF64x8_LOAD_DEFINED +#endif +#if !defined(VF64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8]) +{ + *(__typeof__(vec.gcc) *)x = vec.gcc; +} +# define VF64x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8]) +{ + memcpy(x, &vec, sizeof(vec)); +} +# define VF64x8_STORE_DEFINED +#endif +#if !defined(VF64x8_ADD_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VF64x8_ADD_DEFINED +#endif +#if !defined(VF64x8_SUB_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VF64x8_SUB_DEFINED +#endif +#if !defined(VF64x8_MUL_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VF64x8_MUL_DEFINED +#endif +#if !defined(VF64x8_AVG_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc) / 2; + return vec1; +} +# define VF64x8_AVG_DEFINED +#endif +#if !defined(VF64x8_CMPLT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VF64x8_CMPLT_DEFINED +#endif +#if !defined(VF64x8_CMPEQ_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VF64x8_CMPEQ_DEFINED +#endif +#if !defined(VF64x8_CMPGT_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VF64x8_CMPGT_DEFINED +#endif +#if !defined(VF64x8_CMPLE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VF64x8_CMPLE_DEFINED +#endif +#if !defined(VF64x8_CMPGE_DEFINED) \ + && (VEC_GNUC_ATLEAST(4, 3, 0)) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VF64x8_CMPGE_DEFINED +#endif
--- a/include/vec/impl/generic.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/generic.h Wed Apr 30 18:36:38 2025 -0400 @@ -25,1023 +25,19941 @@ /* This file is automatically generated! Do not edit it directly! * Edit the code that generates it in utils/gengeneric.c --paper */ -#ifndef VEC_IMPL_GENERIC_H_ -#define VEC_IMPL_GENERIC_H_ - -#define VEC_GENERIC_OPERATION(op, sign, bits, size) \ - do { \ - int i; \ - \ - for (i = 0; i < size; i++) \ - vec1.generic[i] = (op); \ - \ - return vec1; \ - } while (0) - -#define VEC_GENERIC_BUILTIN_OPERATION(op, sign, bits, size) \ - VEC_GENERIC_OPERATION(vec1.generic[i] op vec2.generic[i], sign, bits, size) - -#define VEC_GENERIC_CMP(op, sign, bits, size) \ - VEC_GENERIC_OPERATION((vec1.generic[i] op vec2.generic[i]) ? (vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits) : 0, sign, bits, size) - -/* okay, now we can do this crap: */ - -#define VEC_GENERIC_SPLAT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \ - { \ - v##sign##int##bits##x##size vec; \ - for (int i = 0; i < size; i++) \ - vec.generic[i] = x; \ - return vec; \ - } - -#define VEC_GENERIC_LOAD_EX(name, sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits in[size]) \ - { \ - v##sign##int##bits##x##size vec; \ - memcpy(&vec, in, sizeof(vec_##sign##int##bits) * size); \ - return vec; \ - } - -#define VEC_GENERIC_LOAD_ALIGNED(sign, bits, size) VEC_GENERIC_LOAD_EX(load_aligned, sign, bits, size) -#define VEC_GENERIC_LOAD(sign, bits, size) VEC_GENERIC_LOAD_EX(load, sign, bits, size) - -#define VEC_GENERIC_STORE_EX(name, sign, bits, size) \ - VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \ - { \ - memcpy(out, &vec, sizeof(vec_##sign##int##bits) * size); \ - } - -#define VEC_GENERIC_STORE_ALIGNED(sign, bits, size) VEC_GENERIC_STORE_EX(store_aligned, sign, bits, size) -#define VEC_GENERIC_STORE(sign, bits, size) VEC_GENERIC_STORE_EX(store, sign, bits, size) - -#define VEC_GENERIC_ADD(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(+, sign, bits, size); \ - } - -#define VEC_GENERIC_SUB(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(-, sign, bits, size); \ - } - -#define VEC_GENERIC_MUL(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(*, sign, bits, size); \ - } - -#define VEC_GENERIC_DIV(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \ - } - -#define VEC_GENERIC_MOD(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \ - } - -#define VEC_GENERIC_AVG(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - for (int i = 0; i < size; i++) \ - vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \ - \ - return vec1; \ - } - -#define VEC_GENERIC_AND(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(&, sign, bits, size); \ - } - -#define VEC_GENERIC_OR(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(|, sign, bits, size); \ - } - -#define VEC_GENERIC_XOR(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_BUILTIN_OPERATION(^, sign, bits, size); \ - } - -#define VEC_GENERIC_NOT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \ - { \ - return v##sign##int##bits##x##size##_xor(vec, v##sign##int##bits##x##size##_splat((vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits))); \ - } - -#define VEC_GENERIC_CMPLT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_CMP(<, sign, bits, size); \ - } - -#define VEC_GENERIC_CMPLE(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmpgt(vec1, vec2)); \ - } - -#define VEC_GENERIC_CMPEQ(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_CMP(==, sign, bits, size); \ - } - -#define VEC_GENERIC_CMPGE(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmplt(vec1, vec2)); \ - } - -#define VEC_GENERIC_CMPGT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - VEC_GENERIC_CMP(>, sign, bits, size); \ - } - -#define VEC_GENERIC_LSHIFT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ - { \ - VEC_GENERIC_OPERATION(vec_##sign##lshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \ - } - -#define VEC_GENERIC_RSHIFT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ - { \ - VEC_GENERIC_OPERATION(vec_##sign##rshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \ - } - -#define VEC_GENERIC_LRSHIFT(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ - { \ - VEC_GENERIC_OPERATION(vec_urshift((vec_uint##bits)vec1.generic[i], vec2.generic[i]), sign, bits, size); \ - } - -#define VEC_GENERIC_MIN(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmplt(vec1, vec2); \ - \ - v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \ - v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \ - \ - return v##sign##int##bits##x##size##_or(a, b); \ - } - -#define VEC_GENERIC_MAX(sign, bits, size) \ - VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ - { \ - v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmpgt(vec1, vec2); \ - \ - v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \ - v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \ - \ - return v##sign##int##bits##x##size##_or(a, b); \ - } - /* ------------------------------------------------------------------------ */ /* PREPROCESSOR HELL INCOMING */ - - -/* vuint8x2 */ - -#ifndef VINT8x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(/* nothing */, 8, 2) +#if !defined(VINT8x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x) +{ + vint8x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VINT8x2_SPLAT_DEFINED #endif -#ifndef VINT8x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 8, 2) +#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2]) +{ + vint8x2 vec; + memcpy(vec.generic, x, 2); + return vec; +} # define VINT8x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x2_LOAD_DEFINED -VEC_GENERIC_LOAD(/* nothing */, 8, 2) +#if !defined(VINT8x2_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2]) +{ + vint8x2 vec; + memcpy(vec.generic, x, 2); + return vec; +} # define VINT8x2_LOAD_DEFINED #endif -#ifndef VINT8x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(/* nothing */, 8, 2) +#if !defined(VINT8x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2]) +{ + memcpy(x, vec.generic, 2); +} # define VINT8x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x2_STORE_DEFINED -VEC_GENERIC_STORE(/* nothing */, 8, 2) +#if !defined(VINT8x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2]) +{ + memcpy(x, vec.generic, 2); +} # define VINT8x2_STORE_DEFINED #endif -#ifndef VINT8x2_ADD_DEFINED -VEC_GENERIC_ADD(/* nothing */, 8, 2) +#if !defined(VINT8x2_ADD_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VINT8x2_ADD_DEFINED #endif -#ifndef VINT8x2_SUB_DEFINED -VEC_GENERIC_SUB(/* nothing */, 8, 2) +#if !defined(VINT8x2_SUB_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VINT8x2_SUB_DEFINED #endif -#ifndef VINT8x2_MUL_DEFINED -VEC_GENERIC_MUL(/* nothing */, 8, 2) +#if !defined(VINT8x2_MUL_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VINT8x2_MUL_DEFINED #endif -#ifndef VINT8x2_DIV_DEFINED -VEC_GENERIC_DIV(/* nothing */, 8, 2) +#if !defined(VINT8x2_DIV_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_div(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VINT8x2_DIV_DEFINED #endif -#ifndef VINT8x2_MOD_DEFINED -VEC_GENERIC_MOD(/* nothing */, 8, 2) +#if !defined(VINT8x2_MOD_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_mod(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VINT8x2_MOD_DEFINED #endif -#ifndef VINT8x2_AVG_DEFINED -VEC_GENERIC_AVG(/* nothing */, 8, 2) +#if !defined(VINT8x2_AVG_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} # define VINT8x2_AVG_DEFINED #endif -#ifndef VINT8x2_AND_DEFINED -VEC_GENERIC_AND(/* nothing */, 8, 2) +#if !defined(VINT8x2_AND_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VINT8x2_AND_DEFINED #endif -#ifndef VINT8x2_OR_DEFINED -VEC_GENERIC_OR(/* nothing */, 8, 2) +#if !defined(VINT8x2_OR_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VINT8x2_OR_DEFINED #endif -#ifndef VINT8x2_XOR_DEFINED -VEC_GENERIC_XOR(/* nothing */, 8, 2) +#if !defined(VINT8x2_XOR_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VINT8x2_XOR_DEFINED #endif -#ifndef VINT8x2_NOT_DEFINED -VEC_GENERIC_NOT(/* nothing */, 8, 2) +#if !defined(VINT8x2_NOT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VINT8x2_NOT_DEFINED #endif -#ifndef VINT8x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(/* nothing */, 8, 2) +#if !defined(VINT8x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VINT8x2_CMPLT_DEFINED #endif -#ifndef VINT8x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(/* nothing */, 8, 2) +#if !defined(VINT8x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VINT8x2_CMPEQ_DEFINED #endif -#ifndef VINT8x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(/* nothing */, 8, 2) +#if !defined(VINT8x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VINT8x2_CMPGT_DEFINED #endif -#ifndef VINT8x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(/* nothing */, 8, 2) +#if !defined(VINT8x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VINT8x2_CMPLE_DEFINED #endif -#ifndef VINT8x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(/* nothing */, 8, 2) +#if !defined(VINT8x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VINT8x2_CMPGE_DEFINED #endif -#ifndef VINT8x2_MIN_DEFINED -VEC_GENERIC_MIN(/* nothing */, 8, 2) +#if !defined(VINT8x2_MIN_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT8x2_MIN_DEFINED #endif -#ifndef VINT8x2_MAX_DEFINED -VEC_GENERIC_MAX(/* nothing */, 8, 2) +#if !defined(VINT8x2_MAX_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT8x2_MAX_DEFINED #endif -#ifndef VINT8x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(/* nothing */, 8, 2) +#if !defined(VINT8x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); + return vec1; +} # define VINT8x2_RSHIFT_DEFINED #endif -#ifndef VINT8x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(/* nothing */, 8, 2) +#if !defined(VINT8x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT8x2_LRSHIFT_DEFINED #endif -#ifndef VINT8x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(/* nothing */, 8, 2) +#if !defined(VINT8x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT8x2_LSHIFT_DEFINED #endif - - -/* vint8x2 */ - -#ifndef VUINT8x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(u, 8, 2) +#if !defined(VUINT8x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x) +{ + vuint8x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VUINT8x2_SPLAT_DEFINED #endif -#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(u, 8, 2) +#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2]) +{ + vuint8x2 vec; + memcpy(vec.generic, x, 2); + return vec; +} # define VUINT8x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x2_LOAD_DEFINED -VEC_GENERIC_LOAD(u, 8, 2) +#if !defined(VUINT8x2_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2]) +{ + vuint8x2 vec; + memcpy(vec.generic, x, 2); + return vec; +} # define VUINT8x2_LOAD_DEFINED #endif -#ifndef VUINT8x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(u, 8, 2) +#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2]) +{ + memcpy(x, vec.generic, 2); +} # define VUINT8x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x2_STORE_DEFINED -VEC_GENERIC_STORE(u, 8, 2) +#if !defined(VUINT8x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2]) +{ + memcpy(x, vec.generic, 2); +} # define VUINT8x2_STORE_DEFINED #endif -#ifndef VUINT8x2_ADD_DEFINED -VEC_GENERIC_ADD(u, 8, 2) +#if !defined(VUINT8x2_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VUINT8x2_ADD_DEFINED #endif -#ifndef VUINT8x2_SUB_DEFINED -VEC_GENERIC_SUB(u, 8, 2) +#if !defined(VUINT8x2_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VUINT8x2_SUB_DEFINED #endif -#ifndef VUINT8x2_MUL_DEFINED -VEC_GENERIC_MUL(u, 8, 2) +#if !defined(VUINT8x2_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VUINT8x2_MUL_DEFINED #endif -#ifndef VUINT8x2_DIV_DEFINED -VEC_GENERIC_DIV(u, 8, 2) +#if !defined(VUINT8x2_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_div(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VUINT8x2_DIV_DEFINED #endif -#ifndef VUINT8x2_MOD_DEFINED -VEC_GENERIC_MOD(u, 8, 2) +#if !defined(VUINT8x2_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_mod(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VUINT8x2_MOD_DEFINED #endif -#ifndef VUINT8x2_AVG_DEFINED -VEC_GENERIC_AVG(u, 8, 2) +#if !defined(VUINT8x2_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); + return vec1; +} # define VUINT8x2_AVG_DEFINED #endif -#ifndef VUINT8x2_AND_DEFINED -VEC_GENERIC_AND(u, 8, 2) +#if !defined(VUINT8x2_AND_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VUINT8x2_AND_DEFINED #endif -#ifndef VUINT8x2_OR_DEFINED -VEC_GENERIC_OR(u, 8, 2) +#if !defined(VUINT8x2_OR_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VUINT8x2_OR_DEFINED #endif -#ifndef VUINT8x2_XOR_DEFINED -VEC_GENERIC_XOR(u, 8, 2) +#if !defined(VUINT8x2_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VUINT8x2_XOR_DEFINED #endif -#ifndef VUINT8x2_NOT_DEFINED -VEC_GENERIC_NOT(u, 8, 2) +#if !defined(VUINT8x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VUINT8x2_NOT_DEFINED #endif -#ifndef VUINT8x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(u, 8, 2) +#if !defined(VUINT8x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VUINT8x2_CMPLT_DEFINED #endif -#ifndef VUINT8x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(u, 8, 2) +#if !defined(VUINT8x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VUINT8x2_CMPEQ_DEFINED #endif -#ifndef VUINT8x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(u, 8, 2) +#if !defined(VUINT8x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VUINT8x2_CMPGT_DEFINED #endif -#ifndef VUINT8x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(u, 8, 2) +#if !defined(VUINT8x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VUINT8x2_CMPLE_DEFINED #endif -#ifndef VUINT8x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(u, 8, 2) +#if !defined(VUINT8x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + return vec1; +} # define VUINT8x2_CMPGE_DEFINED #endif -#ifndef VUINT8x2_MIN_DEFINED -VEC_GENERIC_MIN(u, 8, 2) +#if !defined(VUINT8x2_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT8x2_MIN_DEFINED #endif -#ifndef VUINT8x2_MAX_DEFINED -VEC_GENERIC_MAX(u, 8, 2) +#if !defined(VUINT8x2_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT8x2_MAX_DEFINED #endif -#ifndef VUINT8x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(u, 8, 2) +#if !defined(VUINT8x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT8x2_RSHIFT_DEFINED #endif -#ifndef VUINT8x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(u, 8, 2) +#if !defined(VUINT8x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT8x2_LRSHIFT_DEFINED #endif -#ifndef VUINT8x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(u, 8, 2) +#if !defined(VUINT8x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + return vec1; +} # define VUINT8x2_LSHIFT_DEFINED #endif - - -/* vuint16x2 */ - -#ifndef VINT16x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(/* nothing */, 16, 2) +#if !defined(VINT8x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x) +{ + vint8x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VINT8x4_SPLAT_DEFINED +#endif +#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4]) +{ + vint8x4 vec; + memcpy(vec.generic, x, 4); + return vec; +} +# define VINT8x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x4_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4]) +{ + vint8x4 vec; + memcpy(vec.generic, x, 4); + return vec; +} +# define VINT8x4_LOAD_DEFINED +#endif +#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4]) +{ + memcpy(x, vec.generic, 4); +} +# define VINT8x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4]) +{ + memcpy(x, vec.generic, 4); +} +# define VINT8x4_STORE_DEFINED +#endif +#if !defined(VINT8x4_ADD_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VINT8x4_ADD_DEFINED +#endif +#if !defined(VINT8x4_SUB_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VINT8x4_SUB_DEFINED +#endif +#if !defined(VINT8x4_MUL_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VINT8x4_MUL_DEFINED +#endif +#if !defined(VINT8x4_DIV_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_div(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VINT8x4_DIV_DEFINED +#endif +#if !defined(VINT8x4_MOD_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_mod(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VINT8x4_MOD_DEFINED +#endif +#if !defined(VINT8x4_AVG_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT8x4_AVG_DEFINED +#endif +#if !defined(VINT8x4_AND_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VINT8x4_AND_DEFINED +#endif +#if !defined(VINT8x4_OR_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VINT8x4_OR_DEFINED +#endif +#if !defined(VINT8x4_XOR_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VINT8x4_XOR_DEFINED +#endif +#if !defined(VINT8x4_NOT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VINT8x4_NOT_DEFINED +#endif +#if !defined(VINT8x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x4_CMPLT_DEFINED +#endif +#if !defined(VINT8x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x4_CMPEQ_DEFINED +#endif +#if !defined(VINT8x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x4_CMPGT_DEFINED +#endif +#if !defined(VINT8x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x4_CMPLE_DEFINED +#endif +#if !defined(VINT8x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x4_CMPGE_DEFINED +#endif +#if !defined(VINT8x4_MIN_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT8x4_MIN_DEFINED +#endif +#if !defined(VINT8x4_MAX_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT8x4_MAX_DEFINED +#endif +#if !defined(VINT8x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); + return vec1; +} +# define VINT8x4_RSHIFT_DEFINED +#endif +#if !defined(VINT8x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT8x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT8x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x) +{ + vuint8x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VUINT8x4_SPLAT_DEFINED +#endif +#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4]) +{ + vuint8x4 vec; + memcpy(vec.generic, x, 4); + return vec; +} +# define VUINT8x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x4_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4]) +{ + vuint8x4 vec; + memcpy(vec.generic, x, 4); + return vec; +} +# define VUINT8x4_LOAD_DEFINED +#endif +#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4]) +{ + memcpy(x, vec.generic, 4); +} +# define VUINT8x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4]) +{ + memcpy(x, vec.generic, 4); +} +# define VUINT8x4_STORE_DEFINED +#endif +#if !defined(VUINT8x4_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VUINT8x4_ADD_DEFINED +#endif +#if !defined(VUINT8x4_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VUINT8x4_SUB_DEFINED +#endif +#if !defined(VUINT8x4_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VUINT8x4_MUL_DEFINED +#endif +#if !defined(VUINT8x4_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_div(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VUINT8x4_DIV_DEFINED +#endif +#if !defined(VUINT8x4_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_mod(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VUINT8x4_MOD_DEFINED +#endif +#if !defined(VUINT8x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); + return vec1; +} +# define VUINT8x4_AVG_DEFINED +#endif +#if !defined(VUINT8x4_AND_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VUINT8x4_AND_DEFINED +#endif +#if !defined(VUINT8x4_OR_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VUINT8x4_OR_DEFINED +#endif +#if !defined(VUINT8x4_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VUINT8x4_XOR_DEFINED +#endif +#if !defined(VUINT8x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VUINT8x4_NOT_DEFINED +#endif +#if !defined(VUINT8x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x4_CMPLT_DEFINED +#endif +#if !defined(VUINT8x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x4_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x4_CMPGT_DEFINED +#endif +#if !defined(VUINT8x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x4_CMPLE_DEFINED +#endif +#if !defined(VUINT8x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x4_CMPGE_DEFINED +#endif +#if !defined(VUINT8x4_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT8x4_MIN_DEFINED +#endif +#if !defined(VUINT8x4_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT8x4_MAX_DEFINED +#endif +#if !defined(VUINT8x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x4_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + return vec1; +} +# define VUINT8x4_LSHIFT_DEFINED +#endif +#if !defined(VINT8x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x) +{ + vint8x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VINT8x8_SPLAT_DEFINED +#endif +#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8]) +{ + vint8x8 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VINT8x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x8_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8]) +{ + vint8x8 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VINT8x8_LOAD_DEFINED +#endif +#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8]) +{ + memcpy(x, vec.generic, 8); +} +# define VINT8x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8]) +{ + memcpy(x, vec.generic, 8); +} +# define VINT8x8_STORE_DEFINED +#endif +#if !defined(VINT8x8_ADD_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VINT8x8_ADD_DEFINED +#endif +#if !defined(VINT8x8_SUB_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VINT8x8_SUB_DEFINED +#endif +#if !defined(VINT8x8_MUL_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VINT8x8_MUL_DEFINED +#endif +#if !defined(VINT8x8_DIV_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_div(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VINT8x8_DIV_DEFINED +#endif +#if !defined(VINT8x8_MOD_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_mod(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VINT8x8_MOD_DEFINED +#endif +#if !defined(VINT8x8_AVG_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT8x8_AVG_DEFINED +#endif +#if !defined(VINT8x8_AND_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VINT8x8_AND_DEFINED +#endif +#if !defined(VINT8x8_OR_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VINT8x8_OR_DEFINED +#endif +#if !defined(VINT8x8_XOR_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VINT8x8_XOR_DEFINED +#endif +#if !defined(VINT8x8_NOT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VINT8x8_NOT_DEFINED +#endif +#if !defined(VINT8x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x8_CMPLT_DEFINED +#endif +#if !defined(VINT8x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x8_CMPEQ_DEFINED +#endif +#if !defined(VINT8x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x8_CMPGT_DEFINED +#endif +#if !defined(VINT8x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x8_CMPLE_DEFINED +#endif +#if !defined(VINT8x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x8_CMPGE_DEFINED +#endif +#if !defined(VINT8x8_MIN_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT8x8_MIN_DEFINED +#endif +#if !defined(VINT8x8_MAX_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT8x8_MAX_DEFINED +#endif +#if !defined(VINT8x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); + return vec1; +} +# define VINT8x8_RSHIFT_DEFINED +#endif +#if !defined(VINT8x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT8x8_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT8x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x) +{ + vuint8x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VUINT8x8_SPLAT_DEFINED +#endif +#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8]) +{ + vuint8x8 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VUINT8x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x8_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8]) +{ + vuint8x8 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VUINT8x8_LOAD_DEFINED +#endif +#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8]) +{ + memcpy(x, vec.generic, 8); +} +# define VUINT8x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8]) +{ + memcpy(x, vec.generic, 8); +} +# define VUINT8x8_STORE_DEFINED +#endif +#if !defined(VUINT8x8_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VUINT8x8_ADD_DEFINED +#endif +#if !defined(VUINT8x8_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VUINT8x8_SUB_DEFINED +#endif +#if !defined(VUINT8x8_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VUINT8x8_MUL_DEFINED +#endif +#if !defined(VUINT8x8_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_div(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VUINT8x8_DIV_DEFINED +#endif +#if !defined(VUINT8x8_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_mod(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VUINT8x8_MOD_DEFINED +#endif +#if !defined(VUINT8x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); + return vec1; +} +# define VUINT8x8_AVG_DEFINED +#endif +#if !defined(VUINT8x8_AND_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VUINT8x8_AND_DEFINED +#endif +#if !defined(VUINT8x8_OR_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VUINT8x8_OR_DEFINED +#endif +#if !defined(VUINT8x8_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VUINT8x8_XOR_DEFINED +#endif +#if !defined(VUINT8x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VUINT8x8_NOT_DEFINED +#endif +#if !defined(VUINT8x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x8_CMPLT_DEFINED +#endif +#if !defined(VUINT8x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x8_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x8_CMPGT_DEFINED +#endif +#if !defined(VUINT8x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x8_CMPLE_DEFINED +#endif +#if !defined(VUINT8x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x8_CMPGE_DEFINED +#endif +#if !defined(VUINT8x8_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT8x8_MIN_DEFINED +#endif +#if !defined(VUINT8x8_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT8x8_MAX_DEFINED +#endif +#if !defined(VUINT8x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + return vec1; +} +# define VUINT8x8_LSHIFT_DEFINED +#endif +#if !defined(VINT8x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) +{ + vint8x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VINT8x16_SPLAT_DEFINED +#endif +#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) +{ + vint8x16 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT8x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x16_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) +{ + vint8x16 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT8x16_LOAD_DEFINED +#endif +#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT8x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT8x16_STORE_DEFINED +#endif +#if !defined(VINT8x16_ADD_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VINT8x16_ADD_DEFINED +#endif +#if !defined(VINT8x16_SUB_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VINT8x16_SUB_DEFINED +#endif +#if !defined(VINT8x16_MUL_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VINT8x16_MUL_DEFINED +#endif +#if !defined(VINT8x16_DIV_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_div(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VINT8x16_DIV_DEFINED +#endif +#if !defined(VINT8x16_MOD_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_mod(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VINT8x16_MOD_DEFINED +#endif +#if !defined(VINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT8x16_AVG_DEFINED +#endif +#if !defined(VINT8x16_AND_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VINT8x16_AND_DEFINED +#endif +#if !defined(VINT8x16_OR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VINT8x16_OR_DEFINED +#endif +#if !defined(VINT8x16_XOR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VINT8x16_XOR_DEFINED +#endif +#if !defined(VINT8x16_NOT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VINT8x16_NOT_DEFINED +#endif +#if !defined(VINT8x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x16_CMPLT_DEFINED +#endif +#if !defined(VINT8x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x16_CMPEQ_DEFINED +#endif +#if !defined(VINT8x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x16_CMPGT_DEFINED +#endif +#if !defined(VINT8x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x16_CMPLE_DEFINED +#endif +#if !defined(VINT8x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x16_CMPGE_DEFINED +#endif +#if !defined(VINT8x16_MIN_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT8x16_MIN_DEFINED +#endif +#if !defined(VINT8x16_MAX_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT8x16_MAX_DEFINED +#endif +#if !defined(VINT8x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); + return vec1; +} +# define VINT8x16_RSHIFT_DEFINED +#endif +#if !defined(VINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) +{ + vuint8x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VUINT8x16_SPLAT_DEFINED +#endif +#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) +{ + vuint8x16 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT8x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x16_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) +{ + vuint8x16 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT8x16_LOAD_DEFINED +#endif +#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT8x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT8x16_STORE_DEFINED +#endif +#if !defined(VUINT8x16_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VUINT8x16_ADD_DEFINED +#endif +#if !defined(VUINT8x16_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VUINT8x16_SUB_DEFINED +#endif +#if !defined(VUINT8x16_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VUINT8x16_MUL_DEFINED +#endif +#if !defined(VUINT8x16_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_div(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VUINT8x16_DIV_DEFINED +#endif +#if !defined(VUINT8x16_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_mod(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VUINT8x16_MOD_DEFINED +#endif +#if !defined(VUINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); + return vec1; +} +# define VUINT8x16_AVG_DEFINED +#endif +#if !defined(VUINT8x16_AND_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VUINT8x16_AND_DEFINED +#endif +#if !defined(VUINT8x16_OR_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VUINT8x16_OR_DEFINED +#endif +#if !defined(VUINT8x16_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VUINT8x16_XOR_DEFINED +#endif +#if !defined(VUINT8x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VUINT8x16_NOT_DEFINED +#endif +#if !defined(VUINT8x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x16_CMPLT_DEFINED +#endif +#if !defined(VUINT8x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x16_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x16_CMPGT_DEFINED +#endif +#if !defined(VUINT8x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x16_CMPLE_DEFINED +#endif +#if !defined(VUINT8x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x16_CMPGE_DEFINED +#endif +#if !defined(VUINT8x16_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT8x16_MIN_DEFINED +#endif +#if !defined(VUINT8x16_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT8x16_MAX_DEFINED +#endif +#if !defined(VUINT8x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x16_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + return vec1; +} +# define VUINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VINT8x32_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x) +{ + vint8x32 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + return vec; +} +# define VINT8x32_SPLAT_DEFINED +#endif +#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32]) +{ + vint8x32 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT8x32_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x32_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32]) +{ + vint8x32 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT8x32_LOAD_DEFINED +#endif +#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT8x32_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x32_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT8x32_STORE_DEFINED +#endif +#if !defined(VINT8x32_ADD_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + return vec1; +} +# define VINT8x32_ADD_DEFINED +#endif +#if !defined(VINT8x32_SUB_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + return vec1; +} +# define VINT8x32_SUB_DEFINED +#endif +#if !defined(VINT8x32_MUL_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + return vec1; +} +# define VINT8x32_MUL_DEFINED +#endif +#if !defined(VINT8x32_DIV_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_div(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + return vec1; +} +# define VINT8x32_DIV_DEFINED +#endif +#if !defined(VINT8x32_MOD_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_mod(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + return vec1; +} +# define VINT8x32_MOD_DEFINED +#endif +#if !defined(VINT8x32_AVG_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[16] % 2); + y_d_rem = (vec2.generic[16] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[17] % 2); + y_d_rem = (vec2.generic[17] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[18] % 2); + y_d_rem = (vec2.generic[18] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[19] % 2); + y_d_rem = (vec2.generic[19] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[20] % 2); + y_d_rem = (vec2.generic[20] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[21] % 2); + y_d_rem = (vec2.generic[21] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[22] % 2); + y_d_rem = (vec2.generic[22] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[23] % 2); + y_d_rem = (vec2.generic[23] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[24] % 2); + y_d_rem = (vec2.generic[24] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[25] % 2); + y_d_rem = (vec2.generic[25] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[26] % 2); + y_d_rem = (vec2.generic[26] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[27] % 2); + y_d_rem = (vec2.generic[27] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[28] % 2); + y_d_rem = (vec2.generic[28] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[29] % 2); + y_d_rem = (vec2.generic[29] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[30] % 2); + y_d_rem = (vec2.generic[30] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[31] % 2); + y_d_rem = (vec2.generic[31] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT8x32_AVG_DEFINED +#endif +#if !defined(VINT8x32_AND_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + return vec1; +} +# define VINT8x32_AND_DEFINED +#endif +#if !defined(VINT8x32_OR_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + return vec1; +} +# define VINT8x32_OR_DEFINED +#endif +#if !defined(VINT8x32_XOR_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + return vec1; +} +# define VINT8x32_XOR_DEFINED +#endif +#if !defined(VINT8x32_NOT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + return vec; +} +# define VINT8x32_NOT_DEFINED +#endif +#if !defined(VINT8x32_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x32_CMPLT_DEFINED +#endif +#if !defined(VINT8x32_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x32_CMPEQ_DEFINED +#endif +#if !defined(VINT8x32_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x32_CMPGT_DEFINED +#endif +#if !defined(VINT8x32_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x32_CMPLE_DEFINED +#endif +#if !defined(VINT8x32_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x32_CMPGE_DEFINED +#endif +#if !defined(VINT8x32_MIN_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VINT8x32_MIN_DEFINED +#endif +#if !defined(VINT8x32_MAX_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VINT8x32_MAX_DEFINED +#endif +#if !defined(VINT8x32_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); +vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]); +vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]); +vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]); +vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]); +vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]); +vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]); +vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]); +vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]); +vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]); +vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]); +vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]); +vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]); +vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]); +vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]); +vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]); +vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]); + return vec1; +} +# define VINT8x32_RSHIFT_DEFINED +#endif +#if !defined(VINT8x32_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u >>= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u >>= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u >>= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u >>= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u >>= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u >>= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u >>= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u >>= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u >>= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u >>= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u >>= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u >>= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u >>= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u >>= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u >>= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u >>= vec2.generic[31]; + vec1.generic[31] = x.s; + return vec1; +} +# define VINT8x32_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x32_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u <<= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u <<= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u <<= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u <<= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u <<= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u <<= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u <<= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u <<= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u <<= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u <<= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u <<= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u <<= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u <<= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u <<= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u <<= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u <<= vec2.generic[31]; + vec1.generic[31] = x.s; + return vec1; +} +# define VINT8x32_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x32_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x) +{ + vuint8x32 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + return vec; +} +# define VUINT8x32_SPLAT_DEFINED +#endif +#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32]) +{ + vuint8x32 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT8x32_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x32_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32]) +{ + vuint8x32 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT8x32_LOAD_DEFINED +#endif +#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT8x32_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x32_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT8x32_STORE_DEFINED +#endif +#if !defined(VUINT8x32_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + return vec1; +} +# define VUINT8x32_ADD_DEFINED +#endif +#if !defined(VUINT8x32_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + return vec1; +} +# define VUINT8x32_SUB_DEFINED +#endif +#if !defined(VUINT8x32_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + return vec1; +} +# define VUINT8x32_MUL_DEFINED +#endif +#if !defined(VUINT8x32_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_div(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + return vec1; +} +# define VUINT8x32_DIV_DEFINED +#endif +#if !defined(VUINT8x32_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_mod(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + return vec1; +} +# define VUINT8x32_MOD_DEFINED +#endif +#if !defined(VUINT8x32_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); +vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1); +vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1); +vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1); +vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1); +vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1); +vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1); +vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1); +vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1); +vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1); +vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1); +vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1); +vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1); +vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1); +vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1); +vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1); +vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1); + return vec1; +} +# define VUINT8x32_AVG_DEFINED +#endif +#if !defined(VUINT8x32_AND_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + return vec1; +} +# define VUINT8x32_AND_DEFINED +#endif +#if !defined(VUINT8x32_OR_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + return vec1; +} +# define VUINT8x32_OR_DEFINED +#endif +#if !defined(VUINT8x32_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + return vec1; +} +# define VUINT8x32_XOR_DEFINED +#endif +#if !defined(VUINT8x32_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + return vec; +} +# define VUINT8x32_NOT_DEFINED +#endif +#if !defined(VUINT8x32_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x32_CMPLT_DEFINED +#endif +#if !defined(VUINT8x32_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x32_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x32_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x32_CMPGT_DEFINED +#endif +#if !defined(VUINT8x32_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x32_CMPLE_DEFINED +#endif +#if !defined(VUINT8x32_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x32_CMPGE_DEFINED +#endif +#if !defined(VUINT8x32_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VUINT8x32_MIN_DEFINED +#endif +#if !defined(VUINT8x32_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VUINT8x32_MAX_DEFINED +#endif +#if !defined(VUINT8x32_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x32_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x32_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x32_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x32_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + vec1.generic[16] <<= vec2.generic[0]; + vec1.generic[17] <<= vec2.generic[0]; + vec1.generic[18] <<= vec2.generic[0]; + vec1.generic[19] <<= vec2.generic[0]; + vec1.generic[20] <<= vec2.generic[0]; + vec1.generic[21] <<= vec2.generic[0]; + vec1.generic[22] <<= vec2.generic[0]; + vec1.generic[23] <<= vec2.generic[0]; + vec1.generic[24] <<= vec2.generic[0]; + vec1.generic[25] <<= vec2.generic[0]; + vec1.generic[26] <<= vec2.generic[0]; + vec1.generic[27] <<= vec2.generic[0]; + vec1.generic[28] <<= vec2.generic[0]; + vec1.generic[29] <<= vec2.generic[0]; + vec1.generic[30] <<= vec2.generic[0]; + vec1.generic[31] <<= vec2.generic[0]; + return vec1; +} +# define VUINT8x32_LSHIFT_DEFINED +#endif +#if !defined(VINT8x64_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x) +{ + vint8x64 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + vec.generic[32] = x; + vec.generic[33] = x; + vec.generic[34] = x; + vec.generic[35] = x; + vec.generic[36] = x; + vec.generic[37] = x; + vec.generic[38] = x; + vec.generic[39] = x; + vec.generic[40] = x; + vec.generic[41] = x; + vec.generic[42] = x; + vec.generic[43] = x; + vec.generic[44] = x; + vec.generic[45] = x; + vec.generic[46] = x; + vec.generic[47] = x; + vec.generic[48] = x; + vec.generic[49] = x; + vec.generic[50] = x; + vec.generic[51] = x; + vec.generic[52] = x; + vec.generic[53] = x; + vec.generic[54] = x; + vec.generic[55] = x; + vec.generic[56] = x; + vec.generic[57] = x; + vec.generic[58] = x; + vec.generic[59] = x; + vec.generic[60] = x; + vec.generic[61] = x; + vec.generic[62] = x; + vec.generic[63] = x; + return vec; +} +# define VINT8x64_SPLAT_DEFINED +#endif +#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64]) +{ + vint8x64 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT8x64_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x64_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64]) +{ + vint8x64 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT8x64_LOAD_DEFINED +#endif +#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT8x64_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x64_STORE_DEFINED) +VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT8x64_STORE_DEFINED +#endif +#if !defined(VINT8x64_ADD_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] + vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] + vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] + vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] + vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] + vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] + vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] + vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] + vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] + vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] + vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] + vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] + vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] + vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] + vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] + vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] + vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] + vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] + vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] + vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] + vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] + vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] + vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] + vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] + vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] + vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] + vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] + vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] + vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] + vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] + vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] + vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] + vec2.generic[63]); + return vec1; +} +# define VINT8x64_ADD_DEFINED +#endif +#if !defined(VINT8x64_SUB_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] - vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] - vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] - vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] - vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] - vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] - vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] - vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] - vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] - vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] - vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] - vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] - vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] - vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] - vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] - vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] - vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] - vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] - vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] - vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] - vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] - vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] - vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] - vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] - vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] - vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] - vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] - vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] - vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] - vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] - vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] - vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] - vec2.generic[63]); + return vec1; +} +# define VINT8x64_SUB_DEFINED +#endif +#if !defined(VINT8x64_MUL_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] * vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] * vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] * vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] * vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] * vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] * vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] * vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] * vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] * vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] * vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] * vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] * vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] * vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] * vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] * vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] * vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] * vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] * vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] * vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] * vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] * vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] * vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] * vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] * vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] * vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] * vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] * vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] * vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] * vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] * vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] * vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] * vec2.generic[63]); + return vec1; +} +# define VINT8x64_MUL_DEFINED +#endif +#if !defined(VINT8x64_DIV_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_div(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] / vec2.generic[32]) : 0); + vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] / vec2.generic[33]) : 0); + vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] / vec2.generic[34]) : 0); + vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] / vec2.generic[35]) : 0); + vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] / vec2.generic[36]) : 0); + vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] / vec2.generic[37]) : 0); + vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] / vec2.generic[38]) : 0); + vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] / vec2.generic[39]) : 0); + vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] / vec2.generic[40]) : 0); + vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] / vec2.generic[41]) : 0); + vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] / vec2.generic[42]) : 0); + vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] / vec2.generic[43]) : 0); + vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] / vec2.generic[44]) : 0); + vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] / vec2.generic[45]) : 0); + vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] / vec2.generic[46]) : 0); + vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] / vec2.generic[47]) : 0); + vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] / vec2.generic[48]) : 0); + vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] / vec2.generic[49]) : 0); + vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] / vec2.generic[50]) : 0); + vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] / vec2.generic[51]) : 0); + vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] / vec2.generic[52]) : 0); + vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] / vec2.generic[53]) : 0); + vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] / vec2.generic[54]) : 0); + vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] / vec2.generic[55]) : 0); + vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] / vec2.generic[56]) : 0); + vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] / vec2.generic[57]) : 0); + vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] / vec2.generic[58]) : 0); + vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] / vec2.generic[59]) : 0); + vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] / vec2.generic[60]) : 0); + vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] / vec2.generic[61]) : 0); + vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] / vec2.generic[62]) : 0); + vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] / vec2.generic[63]) : 0); + return vec1; +} +# define VINT8x64_DIV_DEFINED +#endif +#if !defined(VINT8x64_MOD_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_mod(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] % vec2.generic[32]) : 0); + vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] % vec2.generic[33]) : 0); + vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] % vec2.generic[34]) : 0); + vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] % vec2.generic[35]) : 0); + vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] % vec2.generic[36]) : 0); + vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] % vec2.generic[37]) : 0); + vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] % vec2.generic[38]) : 0); + vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] % vec2.generic[39]) : 0); + vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] % vec2.generic[40]) : 0); + vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] % vec2.generic[41]) : 0); + vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] % vec2.generic[42]) : 0); + vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] % vec2.generic[43]) : 0); + vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] % vec2.generic[44]) : 0); + vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] % vec2.generic[45]) : 0); + vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] % vec2.generic[46]) : 0); + vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] % vec2.generic[47]) : 0); + vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] % vec2.generic[48]) : 0); + vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] % vec2.generic[49]) : 0); + vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] % vec2.generic[50]) : 0); + vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] % vec2.generic[51]) : 0); + vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] % vec2.generic[52]) : 0); + vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] % vec2.generic[53]) : 0); + vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] % vec2.generic[54]) : 0); + vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] % vec2.generic[55]) : 0); + vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] % vec2.generic[56]) : 0); + vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] % vec2.generic[57]) : 0); + vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] % vec2.generic[58]) : 0); + vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] % vec2.generic[59]) : 0); + vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] % vec2.generic[60]) : 0); + vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] % vec2.generic[61]) : 0); + vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] % vec2.generic[62]) : 0); + vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] % vec2.generic[63]) : 0); + return vec1; +} +# define VINT8x64_MOD_DEFINED +#endif +#if !defined(VINT8x64_AVG_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2) +{ + vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[16] % 2); + y_d_rem = (vec2.generic[16] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[17] % 2); + y_d_rem = (vec2.generic[17] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[18] % 2); + y_d_rem = (vec2.generic[18] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[19] % 2); + y_d_rem = (vec2.generic[19] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[20] % 2); + y_d_rem = (vec2.generic[20] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[21] % 2); + y_d_rem = (vec2.generic[21] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[22] % 2); + y_d_rem = (vec2.generic[22] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[23] % 2); + y_d_rem = (vec2.generic[23] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[24] % 2); + y_d_rem = (vec2.generic[24] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[25] % 2); + y_d_rem = (vec2.generic[25] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[26] % 2); + y_d_rem = (vec2.generic[26] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[27] % 2); + y_d_rem = (vec2.generic[27] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[28] % 2); + y_d_rem = (vec2.generic[28] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[29] % 2); + y_d_rem = (vec2.generic[29] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[30] % 2); + y_d_rem = (vec2.generic[30] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[31] % 2); + y_d_rem = (vec2.generic[31] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[32] % 2); + y_d_rem = (vec2.generic[32] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[32] = ((vec1.generic[32] / 2) + (vec2.generic[32] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[33] % 2); + y_d_rem = (vec2.generic[33] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[33] = ((vec1.generic[33] / 2) + (vec2.generic[33] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[34] % 2); + y_d_rem = (vec2.generic[34] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[34] = ((vec1.generic[34] / 2) + (vec2.generic[34] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[35] % 2); + y_d_rem = (vec2.generic[35] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[35] = ((vec1.generic[35] / 2) + (vec2.generic[35] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[36] % 2); + y_d_rem = (vec2.generic[36] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[36] = ((vec1.generic[36] / 2) + (vec2.generic[36] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[37] % 2); + y_d_rem = (vec2.generic[37] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[37] = ((vec1.generic[37] / 2) + (vec2.generic[37] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[38] % 2); + y_d_rem = (vec2.generic[38] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[38] = ((vec1.generic[38] / 2) + (vec2.generic[38] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[39] % 2); + y_d_rem = (vec2.generic[39] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[39] = ((vec1.generic[39] / 2) + (vec2.generic[39] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[40] % 2); + y_d_rem = (vec2.generic[40] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[40] = ((vec1.generic[40] / 2) + (vec2.generic[40] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[41] % 2); + y_d_rem = (vec2.generic[41] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[41] = ((vec1.generic[41] / 2) + (vec2.generic[41] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[42] % 2); + y_d_rem = (vec2.generic[42] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[42] = ((vec1.generic[42] / 2) + (vec2.generic[42] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[43] % 2); + y_d_rem = (vec2.generic[43] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[43] = ((vec1.generic[43] / 2) + (vec2.generic[43] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[44] % 2); + y_d_rem = (vec2.generic[44] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[44] = ((vec1.generic[44] / 2) + (vec2.generic[44] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[45] % 2); + y_d_rem = (vec2.generic[45] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[45] = ((vec1.generic[45] / 2) + (vec2.generic[45] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[46] % 2); + y_d_rem = (vec2.generic[46] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[46] = ((vec1.generic[46] / 2) + (vec2.generic[46] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[47] % 2); + y_d_rem = (vec2.generic[47] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[47] = ((vec1.generic[47] / 2) + (vec2.generic[47] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[48] % 2); + y_d_rem = (vec2.generic[48] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[48] = ((vec1.generic[48] / 2) + (vec2.generic[48] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[49] % 2); + y_d_rem = (vec2.generic[49] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[49] = ((vec1.generic[49] / 2) + (vec2.generic[49] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[50] % 2); + y_d_rem = (vec2.generic[50] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[50] = ((vec1.generic[50] / 2) + (vec2.generic[50] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[51] % 2); + y_d_rem = (vec2.generic[51] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[51] = ((vec1.generic[51] / 2) + (vec2.generic[51] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[52] % 2); + y_d_rem = (vec2.generic[52] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[52] = ((vec1.generic[52] / 2) + (vec2.generic[52] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[53] % 2); + y_d_rem = (vec2.generic[53] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[53] = ((vec1.generic[53] / 2) + (vec2.generic[53] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[54] % 2); + y_d_rem = (vec2.generic[54] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[54] = ((vec1.generic[54] / 2) + (vec2.generic[54] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[55] % 2); + y_d_rem = (vec2.generic[55] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[55] = ((vec1.generic[55] / 2) + (vec2.generic[55] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[56] % 2); + y_d_rem = (vec2.generic[56] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[56] = ((vec1.generic[56] / 2) + (vec2.generic[56] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[57] % 2); + y_d_rem = (vec2.generic[57] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[57] = ((vec1.generic[57] / 2) + (vec2.generic[57] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[58] % 2); + y_d_rem = (vec2.generic[58] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[58] = ((vec1.generic[58] / 2) + (vec2.generic[58] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[59] % 2); + y_d_rem = (vec2.generic[59] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[59] = ((vec1.generic[59] / 2) + (vec2.generic[59] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[60] % 2); + y_d_rem = (vec2.generic[60] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[60] = ((vec1.generic[60] / 2) + (vec2.generic[60] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[61] % 2); + y_d_rem = (vec2.generic[61] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[61] = ((vec1.generic[61] / 2) + (vec2.generic[61] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[62] % 2); + y_d_rem = (vec2.generic[62] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[62] = ((vec1.generic[62] / 2) + (vec2.generic[62] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[63] % 2); + y_d_rem = (vec2.generic[63] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[63] = ((vec1.generic[63] / 2) + (vec2.generic[63] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT8x64_AVG_DEFINED +#endif +#if !defined(VINT8x64_AND_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] & vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] & vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] & vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] & vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] & vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] & vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] & vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] & vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] & vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] & vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] & vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] & vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] & vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] & vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] & vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] & vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] & vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] & vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] & vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] & vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] & vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] & vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] & vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] & vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] & vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] & vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] & vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] & vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] & vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] & vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] & vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] & vec2.generic[63]); + return vec1; +} +# define VINT8x64_AND_DEFINED +#endif +#if !defined(VINT8x64_OR_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] | vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] | vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] | vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] | vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] | vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] | vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] | vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] | vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] | vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] | vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] | vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] | vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] | vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] | vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] | vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] | vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] | vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] | vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] | vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] | vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] | vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] | vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] | vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] | vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] | vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] | vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] | vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] | vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] | vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] | vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] | vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] | vec2.generic[63]); + return vec1; +} +# define VINT8x64_OR_DEFINED +#endif +#if !defined(VINT8x64_XOR_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] ^ vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] ^ vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] ^ vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] ^ vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] ^ vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] ^ vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] ^ vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] ^ vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] ^ vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] ^ vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] ^ vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] ^ vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] ^ vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] ^ vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] ^ vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] ^ vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] ^ vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] ^ vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] ^ vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] ^ vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] ^ vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] ^ vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] ^ vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] ^ vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] ^ vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] ^ vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] ^ vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] ^ vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] ^ vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] ^ vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] ^ vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] ^ vec2.generic[63]); + return vec1; +} +# define VINT8x64_XOR_DEFINED +#endif +#if !defined(VINT8x64_NOT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + vec.generic[32] = ~vec.generic[32]; + vec.generic[33] = ~vec.generic[33]; + vec.generic[34] = ~vec.generic[34]; + vec.generic[35] = ~vec.generic[35]; + vec.generic[36] = ~vec.generic[36]; + vec.generic[37] = ~vec.generic[37]; + vec.generic[38] = ~vec.generic[38]; + vec.generic[39] = ~vec.generic[39]; + vec.generic[40] = ~vec.generic[40]; + vec.generic[41] = ~vec.generic[41]; + vec.generic[42] = ~vec.generic[42]; + vec.generic[43] = ~vec.generic[43]; + vec.generic[44] = ~vec.generic[44]; + vec.generic[45] = ~vec.generic[45]; + vec.generic[46] = ~vec.generic[46]; + vec.generic[47] = ~vec.generic[47]; + vec.generic[48] = ~vec.generic[48]; + vec.generic[49] = ~vec.generic[49]; + vec.generic[50] = ~vec.generic[50]; + vec.generic[51] = ~vec.generic[51]; + vec.generic[52] = ~vec.generic[52]; + vec.generic[53] = ~vec.generic[53]; + vec.generic[54] = ~vec.generic[54]; + vec.generic[55] = ~vec.generic[55]; + vec.generic[56] = ~vec.generic[56]; + vec.generic[57] = ~vec.generic[57]; + vec.generic[58] = ~vec.generic[58]; + vec.generic[59] = ~vec.generic[59]; + vec.generic[60] = ~vec.generic[60]; + vec.generic[61] = ~vec.generic[61]; + vec.generic[62] = ~vec.generic[62]; + vec.generic[63] = ~vec.generic[63]; + return vec; +} +# define VINT8x64_NOT_DEFINED +#endif +#if !defined(VINT8x64_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] < vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] < vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] < vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] < vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] < vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] < vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] < vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] < vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] < vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] < vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] < vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] < vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] < vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] < vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] < vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] < vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] < vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] < vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] < vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] < vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] < vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] < vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] < vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] < vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] < vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] < vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] < vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] < vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] < vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] < vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] < vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] < vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x64_CMPLT_DEFINED +#endif +#if !defined(VINT8x64_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] == vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] == vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] == vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] == vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] == vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] == vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] == vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] == vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] == vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] == vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] == vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] == vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] == vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] == vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] == vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] == vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] == vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] == vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] == vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] == vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] == vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] == vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] == vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] == vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] == vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] == vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] == vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] == vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] == vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] == vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] == vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] == vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x64_CMPEQ_DEFINED +#endif +#if !defined(VINT8x64_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] > vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] > vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] > vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] > vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] > vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] > vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] > vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] > vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] > vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] > vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] > vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] > vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] > vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] > vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] > vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] > vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] > vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] > vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] > vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] > vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] > vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] > vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] > vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] > vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] > vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] > vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] > vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] > vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] > vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] > vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] > vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] > vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x64_CMPGT_DEFINED +#endif +#if !defined(VINT8x64_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] <= vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] <= vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] <= vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] <= vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] <= vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] <= vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] <= vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] <= vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] <= vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] <= vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] <= vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] <= vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] <= vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] <= vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] <= vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] <= vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] <= vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] <= vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] <= vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] <= vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] <= vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] <= vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] <= vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] <= vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] <= vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] <= vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] <= vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] <= vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] <= vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] <= vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] <= vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] <= vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x64_CMPLE_DEFINED +#endif +#if !defined(VINT8x64_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] >= vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] >= vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] >= vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] >= vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] >= vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] >= vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] >= vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] >= vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] >= vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] >= vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] >= vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] >= vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] >= vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] >= vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] >= vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] >= vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] >= vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] >= vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] >= vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] >= vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] >= vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] >= vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] >= vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] >= vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] >= vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] >= vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] >= vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] >= vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] >= vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] >= vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] >= vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] >= vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VINT8x64_CMPGE_DEFINED +#endif +#if !defined(VINT8x64_MIN_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] < vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] < vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] < vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] < vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] < vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] < vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] < vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] < vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] < vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] < vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] < vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] < vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] < vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] < vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] < vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] < vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] < vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] < vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] < vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] < vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] < vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] < vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] < vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] < vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] < vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] < vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] < vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] < vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] < vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] < vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] < vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] < vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]); + return vec1; +} +# define VINT8x64_MIN_DEFINED +#endif +#if !defined(VINT8x64_MAX_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] > vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] > vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] > vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] > vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] > vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] > vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] > vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] > vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] > vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] > vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] > vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] > vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] > vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] > vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] > vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] > vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] > vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] > vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] > vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] > vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] > vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] > vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] > vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] > vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] > vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] > vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] > vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] > vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] > vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] > vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] > vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] > vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]); + return vec1; +} +# define VINT8x64_MAX_DEFINED +#endif +#if !defined(VINT8x64_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); +vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]); +vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]); +vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]); +vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]); +vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]); +vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]); +vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]); +vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]); +vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]); +vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]); +vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]); +vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]); +vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]); +vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]); +vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]); +vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]); +vec1.generic[32] = ((~vec1.generic[32]) >> vec2.generic[32]); +vec1.generic[33] = ((~vec1.generic[33]) >> vec2.generic[33]); +vec1.generic[34] = ((~vec1.generic[34]) >> vec2.generic[34]); +vec1.generic[35] = ((~vec1.generic[35]) >> vec2.generic[35]); +vec1.generic[36] = ((~vec1.generic[36]) >> vec2.generic[36]); +vec1.generic[37] = ((~vec1.generic[37]) >> vec2.generic[37]); +vec1.generic[38] = ((~vec1.generic[38]) >> vec2.generic[38]); +vec1.generic[39] = ((~vec1.generic[39]) >> vec2.generic[39]); +vec1.generic[40] = ((~vec1.generic[40]) >> vec2.generic[40]); +vec1.generic[41] = ((~vec1.generic[41]) >> vec2.generic[41]); +vec1.generic[42] = ((~vec1.generic[42]) >> vec2.generic[42]); +vec1.generic[43] = ((~vec1.generic[43]) >> vec2.generic[43]); +vec1.generic[44] = ((~vec1.generic[44]) >> vec2.generic[44]); +vec1.generic[45] = ((~vec1.generic[45]) >> vec2.generic[45]); +vec1.generic[46] = ((~vec1.generic[46]) >> vec2.generic[46]); +vec1.generic[47] = ((~vec1.generic[47]) >> vec2.generic[47]); +vec1.generic[48] = ((~vec1.generic[48]) >> vec2.generic[48]); +vec1.generic[49] = ((~vec1.generic[49]) >> vec2.generic[49]); +vec1.generic[50] = ((~vec1.generic[50]) >> vec2.generic[50]); +vec1.generic[51] = ((~vec1.generic[51]) >> vec2.generic[51]); +vec1.generic[52] = ((~vec1.generic[52]) >> vec2.generic[52]); +vec1.generic[53] = ((~vec1.generic[53]) >> vec2.generic[53]); +vec1.generic[54] = ((~vec1.generic[54]) >> vec2.generic[54]); +vec1.generic[55] = ((~vec1.generic[55]) >> vec2.generic[55]); +vec1.generic[56] = ((~vec1.generic[56]) >> vec2.generic[56]); +vec1.generic[57] = ((~vec1.generic[57]) >> vec2.generic[57]); +vec1.generic[58] = ((~vec1.generic[58]) >> vec2.generic[58]); +vec1.generic[59] = ((~vec1.generic[59]) >> vec2.generic[59]); +vec1.generic[60] = ((~vec1.generic[60]) >> vec2.generic[60]); +vec1.generic[61] = ((~vec1.generic[61]) >> vec2.generic[61]); +vec1.generic[62] = ((~vec1.generic[62]) >> vec2.generic[62]); +vec1.generic[63] = ((~vec1.generic[63]) >> vec2.generic[63]); + return vec1; +} +# define VINT8x64_RSHIFT_DEFINED +#endif +#if !defined(VINT8x64_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u >>= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u >>= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u >>= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u >>= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u >>= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u >>= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u >>= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u >>= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u >>= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u >>= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u >>= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u >>= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u >>= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u >>= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u >>= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u >>= vec2.generic[31]; + vec1.generic[31] = x.s; + x.s = vec1.generic[32]; + x.u >>= vec2.generic[32]; + vec1.generic[32] = x.s; + x.s = vec1.generic[33]; + x.u >>= vec2.generic[33]; + vec1.generic[33] = x.s; + x.s = vec1.generic[34]; + x.u >>= vec2.generic[34]; + vec1.generic[34] = x.s; + x.s = vec1.generic[35]; + x.u >>= vec2.generic[35]; + vec1.generic[35] = x.s; + x.s = vec1.generic[36]; + x.u >>= vec2.generic[36]; + vec1.generic[36] = x.s; + x.s = vec1.generic[37]; + x.u >>= vec2.generic[37]; + vec1.generic[37] = x.s; + x.s = vec1.generic[38]; + x.u >>= vec2.generic[38]; + vec1.generic[38] = x.s; + x.s = vec1.generic[39]; + x.u >>= vec2.generic[39]; + vec1.generic[39] = x.s; + x.s = vec1.generic[40]; + x.u >>= vec2.generic[40]; + vec1.generic[40] = x.s; + x.s = vec1.generic[41]; + x.u >>= vec2.generic[41]; + vec1.generic[41] = x.s; + x.s = vec1.generic[42]; + x.u >>= vec2.generic[42]; + vec1.generic[42] = x.s; + x.s = vec1.generic[43]; + x.u >>= vec2.generic[43]; + vec1.generic[43] = x.s; + x.s = vec1.generic[44]; + x.u >>= vec2.generic[44]; + vec1.generic[44] = x.s; + x.s = vec1.generic[45]; + x.u >>= vec2.generic[45]; + vec1.generic[45] = x.s; + x.s = vec1.generic[46]; + x.u >>= vec2.generic[46]; + vec1.generic[46] = x.s; + x.s = vec1.generic[47]; + x.u >>= vec2.generic[47]; + vec1.generic[47] = x.s; + x.s = vec1.generic[48]; + x.u >>= vec2.generic[48]; + vec1.generic[48] = x.s; + x.s = vec1.generic[49]; + x.u >>= vec2.generic[49]; + vec1.generic[49] = x.s; + x.s = vec1.generic[50]; + x.u >>= vec2.generic[50]; + vec1.generic[50] = x.s; + x.s = vec1.generic[51]; + x.u >>= vec2.generic[51]; + vec1.generic[51] = x.s; + x.s = vec1.generic[52]; + x.u >>= vec2.generic[52]; + vec1.generic[52] = x.s; + x.s = vec1.generic[53]; + x.u >>= vec2.generic[53]; + vec1.generic[53] = x.s; + x.s = vec1.generic[54]; + x.u >>= vec2.generic[54]; + vec1.generic[54] = x.s; + x.s = vec1.generic[55]; + x.u >>= vec2.generic[55]; + vec1.generic[55] = x.s; + x.s = vec1.generic[56]; + x.u >>= vec2.generic[56]; + vec1.generic[56] = x.s; + x.s = vec1.generic[57]; + x.u >>= vec2.generic[57]; + vec1.generic[57] = x.s; + x.s = vec1.generic[58]; + x.u >>= vec2.generic[58]; + vec1.generic[58] = x.s; + x.s = vec1.generic[59]; + x.u >>= vec2.generic[59]; + vec1.generic[59] = x.s; + x.s = vec1.generic[60]; + x.u >>= vec2.generic[60]; + vec1.generic[60] = x.s; + x.s = vec1.generic[61]; + x.u >>= vec2.generic[61]; + vec1.generic[61] = x.s; + x.s = vec1.generic[62]; + x.u >>= vec2.generic[62]; + vec1.generic[62] = x.s; + x.s = vec1.generic[63]; + x.u >>= vec2.generic[63]; + vec1.generic[63] = x.s; + return vec1; +} +# define VINT8x64_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x64_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2) +{ + union { vec_uint8 u; vec_int8 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u <<= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u <<= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u <<= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u <<= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u <<= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u <<= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u <<= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u <<= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u <<= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u <<= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u <<= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u <<= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u <<= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u <<= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u <<= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u <<= vec2.generic[31]; + vec1.generic[31] = x.s; + x.s = vec1.generic[32]; + x.u <<= vec2.generic[32]; + vec1.generic[32] = x.s; + x.s = vec1.generic[33]; + x.u <<= vec2.generic[33]; + vec1.generic[33] = x.s; + x.s = vec1.generic[34]; + x.u <<= vec2.generic[34]; + vec1.generic[34] = x.s; + x.s = vec1.generic[35]; + x.u <<= vec2.generic[35]; + vec1.generic[35] = x.s; + x.s = vec1.generic[36]; + x.u <<= vec2.generic[36]; + vec1.generic[36] = x.s; + x.s = vec1.generic[37]; + x.u <<= vec2.generic[37]; + vec1.generic[37] = x.s; + x.s = vec1.generic[38]; + x.u <<= vec2.generic[38]; + vec1.generic[38] = x.s; + x.s = vec1.generic[39]; + x.u <<= vec2.generic[39]; + vec1.generic[39] = x.s; + x.s = vec1.generic[40]; + x.u <<= vec2.generic[40]; + vec1.generic[40] = x.s; + x.s = vec1.generic[41]; + x.u <<= vec2.generic[41]; + vec1.generic[41] = x.s; + x.s = vec1.generic[42]; + x.u <<= vec2.generic[42]; + vec1.generic[42] = x.s; + x.s = vec1.generic[43]; + x.u <<= vec2.generic[43]; + vec1.generic[43] = x.s; + x.s = vec1.generic[44]; + x.u <<= vec2.generic[44]; + vec1.generic[44] = x.s; + x.s = vec1.generic[45]; + x.u <<= vec2.generic[45]; + vec1.generic[45] = x.s; + x.s = vec1.generic[46]; + x.u <<= vec2.generic[46]; + vec1.generic[46] = x.s; + x.s = vec1.generic[47]; + x.u <<= vec2.generic[47]; + vec1.generic[47] = x.s; + x.s = vec1.generic[48]; + x.u <<= vec2.generic[48]; + vec1.generic[48] = x.s; + x.s = vec1.generic[49]; + x.u <<= vec2.generic[49]; + vec1.generic[49] = x.s; + x.s = vec1.generic[50]; + x.u <<= vec2.generic[50]; + vec1.generic[50] = x.s; + x.s = vec1.generic[51]; + x.u <<= vec2.generic[51]; + vec1.generic[51] = x.s; + x.s = vec1.generic[52]; + x.u <<= vec2.generic[52]; + vec1.generic[52] = x.s; + x.s = vec1.generic[53]; + x.u <<= vec2.generic[53]; + vec1.generic[53] = x.s; + x.s = vec1.generic[54]; + x.u <<= vec2.generic[54]; + vec1.generic[54] = x.s; + x.s = vec1.generic[55]; + x.u <<= vec2.generic[55]; + vec1.generic[55] = x.s; + x.s = vec1.generic[56]; + x.u <<= vec2.generic[56]; + vec1.generic[56] = x.s; + x.s = vec1.generic[57]; + x.u <<= vec2.generic[57]; + vec1.generic[57] = x.s; + x.s = vec1.generic[58]; + x.u <<= vec2.generic[58]; + vec1.generic[58] = x.s; + x.s = vec1.generic[59]; + x.u <<= vec2.generic[59]; + vec1.generic[59] = x.s; + x.s = vec1.generic[60]; + x.u <<= vec2.generic[60]; + vec1.generic[60] = x.s; + x.s = vec1.generic[61]; + x.u <<= vec2.generic[61]; + vec1.generic[61] = x.s; + x.s = vec1.generic[62]; + x.u <<= vec2.generic[62]; + vec1.generic[62] = x.s; + x.s = vec1.generic[63]; + x.u <<= vec2.generic[63]; + vec1.generic[63] = x.s; + return vec1; +} +# define VINT8x64_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x64_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x) +{ + vuint8x64 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + vec.generic[32] = x; + vec.generic[33] = x; + vec.generic[34] = x; + vec.generic[35] = x; + vec.generic[36] = x; + vec.generic[37] = x; + vec.generic[38] = x; + vec.generic[39] = x; + vec.generic[40] = x; + vec.generic[41] = x; + vec.generic[42] = x; + vec.generic[43] = x; + vec.generic[44] = x; + vec.generic[45] = x; + vec.generic[46] = x; + vec.generic[47] = x; + vec.generic[48] = x; + vec.generic[49] = x; + vec.generic[50] = x; + vec.generic[51] = x; + vec.generic[52] = x; + vec.generic[53] = x; + vec.generic[54] = x; + vec.generic[55] = x; + vec.generic[56] = x; + vec.generic[57] = x; + vec.generic[58] = x; + vec.generic[59] = x; + vec.generic[60] = x; + vec.generic[61] = x; + vec.generic[62] = x; + vec.generic[63] = x; + return vec; +} +# define VUINT8x64_SPLAT_DEFINED +#endif +#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64]) +{ + vuint8x64 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT8x64_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x64_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64]) +{ + vuint8x64 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT8x64_LOAD_DEFINED +#endif +#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT8x64_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x64_STORE_DEFINED) +VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT8x64_STORE_DEFINED +#endif +#if !defined(VUINT8x64_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] + vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] + vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] + vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] + vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] + vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] + vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] + vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] + vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] + vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] + vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] + vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] + vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] + vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] + vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] + vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] + vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] + vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] + vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] + vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] + vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] + vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] + vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] + vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] + vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] + vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] + vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] + vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] + vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] + vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] + vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] + vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] + vec2.generic[63]); + return vec1; +} +# define VUINT8x64_ADD_DEFINED +#endif +#if !defined(VUINT8x64_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] - vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] - vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] - vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] - vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] - vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] - vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] - vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] - vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] - vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] - vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] - vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] - vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] - vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] - vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] - vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] - vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] - vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] - vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] - vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] - vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] - vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] - vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] - vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] - vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] - vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] - vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] - vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] - vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] - vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] - vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] - vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] - vec2.generic[63]); + return vec1; +} +# define VUINT8x64_SUB_DEFINED +#endif +#if !defined(VUINT8x64_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] * vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] * vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] * vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] * vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] * vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] * vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] * vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] * vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] * vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] * vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] * vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] * vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] * vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] * vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] * vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] * vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] * vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] * vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] * vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] * vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] * vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] * vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] * vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] * vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] * vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] * vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] * vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] * vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] * vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] * vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] * vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] * vec2.generic[63]); + return vec1; +} +# define VUINT8x64_MUL_DEFINED +#endif +#if !defined(VUINT8x64_DIV_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_div(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] / vec2.generic[32]) : 0); + vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] / vec2.generic[33]) : 0); + vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] / vec2.generic[34]) : 0); + vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] / vec2.generic[35]) : 0); + vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] / vec2.generic[36]) : 0); + vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] / vec2.generic[37]) : 0); + vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] / vec2.generic[38]) : 0); + vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] / vec2.generic[39]) : 0); + vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] / vec2.generic[40]) : 0); + vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] / vec2.generic[41]) : 0); + vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] / vec2.generic[42]) : 0); + vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] / vec2.generic[43]) : 0); + vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] / vec2.generic[44]) : 0); + vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] / vec2.generic[45]) : 0); + vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] / vec2.generic[46]) : 0); + vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] / vec2.generic[47]) : 0); + vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] / vec2.generic[48]) : 0); + vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] / vec2.generic[49]) : 0); + vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] / vec2.generic[50]) : 0); + vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] / vec2.generic[51]) : 0); + vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] / vec2.generic[52]) : 0); + vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] / vec2.generic[53]) : 0); + vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] / vec2.generic[54]) : 0); + vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] / vec2.generic[55]) : 0); + vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] / vec2.generic[56]) : 0); + vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] / vec2.generic[57]) : 0); + vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] / vec2.generic[58]) : 0); + vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] / vec2.generic[59]) : 0); + vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] / vec2.generic[60]) : 0); + vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] / vec2.generic[61]) : 0); + vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] / vec2.generic[62]) : 0); + vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] / vec2.generic[63]) : 0); + return vec1; +} +# define VUINT8x64_DIV_DEFINED +#endif +#if !defined(VUINT8x64_MOD_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_mod(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] % vec2.generic[32]) : 0); + vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] % vec2.generic[33]) : 0); + vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] % vec2.generic[34]) : 0); + vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] % vec2.generic[35]) : 0); + vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] % vec2.generic[36]) : 0); + vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] % vec2.generic[37]) : 0); + vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] % vec2.generic[38]) : 0); + vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] % vec2.generic[39]) : 0); + vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] % vec2.generic[40]) : 0); + vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] % vec2.generic[41]) : 0); + vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] % vec2.generic[42]) : 0); + vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] % vec2.generic[43]) : 0); + vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] % vec2.generic[44]) : 0); + vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] % vec2.generic[45]) : 0); + vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] % vec2.generic[46]) : 0); + vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] % vec2.generic[47]) : 0); + vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] % vec2.generic[48]) : 0); + vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] % vec2.generic[49]) : 0); + vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] % vec2.generic[50]) : 0); + vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] % vec2.generic[51]) : 0); + vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] % vec2.generic[52]) : 0); + vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] % vec2.generic[53]) : 0); + vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] % vec2.generic[54]) : 0); + vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] % vec2.generic[55]) : 0); + vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] % vec2.generic[56]) : 0); + vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] % vec2.generic[57]) : 0); + vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] % vec2.generic[58]) : 0); + vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] % vec2.generic[59]) : 0); + vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] % vec2.generic[60]) : 0); + vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] % vec2.generic[61]) : 0); + vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] % vec2.generic[62]) : 0); + vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] % vec2.generic[63]) : 0); + return vec1; +} +# define VUINT8x64_MOD_DEFINED +#endif +#if !defined(VUINT8x64_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); +vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1); +vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1); +vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1); +vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1); +vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1); +vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1); +vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1); +vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1); +vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1); +vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1); +vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1); +vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1); +vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1); +vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1); +vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1); +vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1); +vec1.generic[32] = (vec1.generic[32] >> 1) + (vec2.generic[32] >> 1) + ((vec1.generic[32] | vec2.generic[32]) & 1); +vec1.generic[33] = (vec1.generic[33] >> 1) + (vec2.generic[33] >> 1) + ((vec1.generic[33] | vec2.generic[33]) & 1); +vec1.generic[34] = (vec1.generic[34] >> 1) + (vec2.generic[34] >> 1) + ((vec1.generic[34] | vec2.generic[34]) & 1); +vec1.generic[35] = (vec1.generic[35] >> 1) + (vec2.generic[35] >> 1) + ((vec1.generic[35] | vec2.generic[35]) & 1); +vec1.generic[36] = (vec1.generic[36] >> 1) + (vec2.generic[36] >> 1) + ((vec1.generic[36] | vec2.generic[36]) & 1); +vec1.generic[37] = (vec1.generic[37] >> 1) + (vec2.generic[37] >> 1) + ((vec1.generic[37] | vec2.generic[37]) & 1); +vec1.generic[38] = (vec1.generic[38] >> 1) + (vec2.generic[38] >> 1) + ((vec1.generic[38] | vec2.generic[38]) & 1); +vec1.generic[39] = (vec1.generic[39] >> 1) + (vec2.generic[39] >> 1) + ((vec1.generic[39] | vec2.generic[39]) & 1); +vec1.generic[40] = (vec1.generic[40] >> 1) + (vec2.generic[40] >> 1) + ((vec1.generic[40] | vec2.generic[40]) & 1); +vec1.generic[41] = (vec1.generic[41] >> 1) + (vec2.generic[41] >> 1) + ((vec1.generic[41] | vec2.generic[41]) & 1); +vec1.generic[42] = (vec1.generic[42] >> 1) + (vec2.generic[42] >> 1) + ((vec1.generic[42] | vec2.generic[42]) & 1); +vec1.generic[43] = (vec1.generic[43] >> 1) + (vec2.generic[43] >> 1) + ((vec1.generic[43] | vec2.generic[43]) & 1); +vec1.generic[44] = (vec1.generic[44] >> 1) + (vec2.generic[44] >> 1) + ((vec1.generic[44] | vec2.generic[44]) & 1); +vec1.generic[45] = (vec1.generic[45] >> 1) + (vec2.generic[45] >> 1) + ((vec1.generic[45] | vec2.generic[45]) & 1); +vec1.generic[46] = (vec1.generic[46] >> 1) + (vec2.generic[46] >> 1) + ((vec1.generic[46] | vec2.generic[46]) & 1); +vec1.generic[47] = (vec1.generic[47] >> 1) + (vec2.generic[47] >> 1) + ((vec1.generic[47] | vec2.generic[47]) & 1); +vec1.generic[48] = (vec1.generic[48] >> 1) + (vec2.generic[48] >> 1) + ((vec1.generic[48] | vec2.generic[48]) & 1); +vec1.generic[49] = (vec1.generic[49] >> 1) + (vec2.generic[49] >> 1) + ((vec1.generic[49] | vec2.generic[49]) & 1); +vec1.generic[50] = (vec1.generic[50] >> 1) + (vec2.generic[50] >> 1) + ((vec1.generic[50] | vec2.generic[50]) & 1); +vec1.generic[51] = (vec1.generic[51] >> 1) + (vec2.generic[51] >> 1) + ((vec1.generic[51] | vec2.generic[51]) & 1); +vec1.generic[52] = (vec1.generic[52] >> 1) + (vec2.generic[52] >> 1) + ((vec1.generic[52] | vec2.generic[52]) & 1); +vec1.generic[53] = (vec1.generic[53] >> 1) + (vec2.generic[53] >> 1) + ((vec1.generic[53] | vec2.generic[53]) & 1); +vec1.generic[54] = (vec1.generic[54] >> 1) + (vec2.generic[54] >> 1) + ((vec1.generic[54] | vec2.generic[54]) & 1); +vec1.generic[55] = (vec1.generic[55] >> 1) + (vec2.generic[55] >> 1) + ((vec1.generic[55] | vec2.generic[55]) & 1); +vec1.generic[56] = (vec1.generic[56] >> 1) + (vec2.generic[56] >> 1) + ((vec1.generic[56] | vec2.generic[56]) & 1); +vec1.generic[57] = (vec1.generic[57] >> 1) + (vec2.generic[57] >> 1) + ((vec1.generic[57] | vec2.generic[57]) & 1); +vec1.generic[58] = (vec1.generic[58] >> 1) + (vec2.generic[58] >> 1) + ((vec1.generic[58] | vec2.generic[58]) & 1); +vec1.generic[59] = (vec1.generic[59] >> 1) + (vec2.generic[59] >> 1) + ((vec1.generic[59] | vec2.generic[59]) & 1); +vec1.generic[60] = (vec1.generic[60] >> 1) + (vec2.generic[60] >> 1) + ((vec1.generic[60] | vec2.generic[60]) & 1); +vec1.generic[61] = (vec1.generic[61] >> 1) + (vec2.generic[61] >> 1) + ((vec1.generic[61] | vec2.generic[61]) & 1); +vec1.generic[62] = (vec1.generic[62] >> 1) + (vec2.generic[62] >> 1) + ((vec1.generic[62] | vec2.generic[62]) & 1); +vec1.generic[63] = (vec1.generic[63] >> 1) + (vec2.generic[63] >> 1) + ((vec1.generic[63] | vec2.generic[63]) & 1); + return vec1; +} +# define VUINT8x64_AVG_DEFINED +#endif +#if !defined(VUINT8x64_AND_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] & vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] & vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] & vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] & vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] & vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] & vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] & vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] & vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] & vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] & vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] & vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] & vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] & vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] & vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] & vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] & vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] & vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] & vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] & vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] & vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] & vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] & vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] & vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] & vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] & vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] & vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] & vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] & vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] & vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] & vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] & vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] & vec2.generic[63]); + return vec1; +} +# define VUINT8x64_AND_DEFINED +#endif +#if !defined(VUINT8x64_OR_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] | vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] | vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] | vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] | vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] | vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] | vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] | vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] | vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] | vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] | vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] | vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] | vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] | vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] | vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] | vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] | vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] | vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] | vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] | vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] | vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] | vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] | vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] | vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] | vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] | vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] | vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] | vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] | vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] | vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] | vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] | vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] | vec2.generic[63]); + return vec1; +} +# define VUINT8x64_OR_DEFINED +#endif +#if !defined(VUINT8x64_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] ^ vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] ^ vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] ^ vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] ^ vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] ^ vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] ^ vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] ^ vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] ^ vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] ^ vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] ^ vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] ^ vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] ^ vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] ^ vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] ^ vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] ^ vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] ^ vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] ^ vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] ^ vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] ^ vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] ^ vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] ^ vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] ^ vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] ^ vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] ^ vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] ^ vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] ^ vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] ^ vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] ^ vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] ^ vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] ^ vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] ^ vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] ^ vec2.generic[63]); + return vec1; +} +# define VUINT8x64_XOR_DEFINED +#endif +#if !defined(VUINT8x64_NOT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + vec.generic[32] = ~vec.generic[32]; + vec.generic[33] = ~vec.generic[33]; + vec.generic[34] = ~vec.generic[34]; + vec.generic[35] = ~vec.generic[35]; + vec.generic[36] = ~vec.generic[36]; + vec.generic[37] = ~vec.generic[37]; + vec.generic[38] = ~vec.generic[38]; + vec.generic[39] = ~vec.generic[39]; + vec.generic[40] = ~vec.generic[40]; + vec.generic[41] = ~vec.generic[41]; + vec.generic[42] = ~vec.generic[42]; + vec.generic[43] = ~vec.generic[43]; + vec.generic[44] = ~vec.generic[44]; + vec.generic[45] = ~vec.generic[45]; + vec.generic[46] = ~vec.generic[46]; + vec.generic[47] = ~vec.generic[47]; + vec.generic[48] = ~vec.generic[48]; + vec.generic[49] = ~vec.generic[49]; + vec.generic[50] = ~vec.generic[50]; + vec.generic[51] = ~vec.generic[51]; + vec.generic[52] = ~vec.generic[52]; + vec.generic[53] = ~vec.generic[53]; + vec.generic[54] = ~vec.generic[54]; + vec.generic[55] = ~vec.generic[55]; + vec.generic[56] = ~vec.generic[56]; + vec.generic[57] = ~vec.generic[57]; + vec.generic[58] = ~vec.generic[58]; + vec.generic[59] = ~vec.generic[59]; + vec.generic[60] = ~vec.generic[60]; + vec.generic[61] = ~vec.generic[61]; + vec.generic[62] = ~vec.generic[62]; + vec.generic[63] = ~vec.generic[63]; + return vec; +} +# define VUINT8x64_NOT_DEFINED +#endif +#if !defined(VUINT8x64_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] < vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] < vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] < vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] < vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] < vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] < vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] < vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] < vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] < vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] < vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] < vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] < vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] < vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] < vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] < vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] < vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] < vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] < vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] < vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] < vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] < vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] < vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] < vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] < vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] < vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] < vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] < vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] < vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] < vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] < vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] < vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] < vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x64_CMPLT_DEFINED +#endif +#if !defined(VUINT8x64_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] == vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] == vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] == vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] == vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] == vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] == vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] == vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] == vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] == vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] == vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] == vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] == vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] == vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] == vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] == vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] == vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] == vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] == vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] == vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] == vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] == vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] == vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] == vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] == vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] == vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] == vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] == vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] == vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] == vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] == vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] == vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] == vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x64_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x64_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] > vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] > vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] > vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] > vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] > vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] > vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] > vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] > vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] > vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] > vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] > vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] > vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] > vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] > vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] > vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] > vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] > vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] > vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] > vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] > vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] > vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] > vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] > vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] > vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] > vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] > vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] > vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] > vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] > vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] > vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] > vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] > vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x64_CMPGT_DEFINED +#endif +#if !defined(VUINT8x64_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] <= vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] <= vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] <= vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] <= vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] <= vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] <= vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] <= vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] <= vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] <= vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] <= vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] <= vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] <= vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] <= vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] <= vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] <= vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] <= vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] <= vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] <= vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] <= vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] <= vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] <= vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] <= vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] <= vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] <= vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] <= vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] <= vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] <= vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] <= vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] <= vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] <= vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] <= vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] <= vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x64_CMPLE_DEFINED +#endif +#if !defined(VUINT8x64_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1); + memset(&vec1.generic[32], (vec1.generic[32] >= vec2.generic[32]) ? 0xFF : 0, 1); + memset(&vec1.generic[33], (vec1.generic[33] >= vec2.generic[33]) ? 0xFF : 0, 1); + memset(&vec1.generic[34], (vec1.generic[34] >= vec2.generic[34]) ? 0xFF : 0, 1); + memset(&vec1.generic[35], (vec1.generic[35] >= vec2.generic[35]) ? 0xFF : 0, 1); + memset(&vec1.generic[36], (vec1.generic[36] >= vec2.generic[36]) ? 0xFF : 0, 1); + memset(&vec1.generic[37], (vec1.generic[37] >= vec2.generic[37]) ? 0xFF : 0, 1); + memset(&vec1.generic[38], (vec1.generic[38] >= vec2.generic[38]) ? 0xFF : 0, 1); + memset(&vec1.generic[39], (vec1.generic[39] >= vec2.generic[39]) ? 0xFF : 0, 1); + memset(&vec1.generic[40], (vec1.generic[40] >= vec2.generic[40]) ? 0xFF : 0, 1); + memset(&vec1.generic[41], (vec1.generic[41] >= vec2.generic[41]) ? 0xFF : 0, 1); + memset(&vec1.generic[42], (vec1.generic[42] >= vec2.generic[42]) ? 0xFF : 0, 1); + memset(&vec1.generic[43], (vec1.generic[43] >= vec2.generic[43]) ? 0xFF : 0, 1); + memset(&vec1.generic[44], (vec1.generic[44] >= vec2.generic[44]) ? 0xFF : 0, 1); + memset(&vec1.generic[45], (vec1.generic[45] >= vec2.generic[45]) ? 0xFF : 0, 1); + memset(&vec1.generic[46], (vec1.generic[46] >= vec2.generic[46]) ? 0xFF : 0, 1); + memset(&vec1.generic[47], (vec1.generic[47] >= vec2.generic[47]) ? 0xFF : 0, 1); + memset(&vec1.generic[48], (vec1.generic[48] >= vec2.generic[48]) ? 0xFF : 0, 1); + memset(&vec1.generic[49], (vec1.generic[49] >= vec2.generic[49]) ? 0xFF : 0, 1); + memset(&vec1.generic[50], (vec1.generic[50] >= vec2.generic[50]) ? 0xFF : 0, 1); + memset(&vec1.generic[51], (vec1.generic[51] >= vec2.generic[51]) ? 0xFF : 0, 1); + memset(&vec1.generic[52], (vec1.generic[52] >= vec2.generic[52]) ? 0xFF : 0, 1); + memset(&vec1.generic[53], (vec1.generic[53] >= vec2.generic[53]) ? 0xFF : 0, 1); + memset(&vec1.generic[54], (vec1.generic[54] >= vec2.generic[54]) ? 0xFF : 0, 1); + memset(&vec1.generic[55], (vec1.generic[55] >= vec2.generic[55]) ? 0xFF : 0, 1); + memset(&vec1.generic[56], (vec1.generic[56] >= vec2.generic[56]) ? 0xFF : 0, 1); + memset(&vec1.generic[57], (vec1.generic[57] >= vec2.generic[57]) ? 0xFF : 0, 1); + memset(&vec1.generic[58], (vec1.generic[58] >= vec2.generic[58]) ? 0xFF : 0, 1); + memset(&vec1.generic[59], (vec1.generic[59] >= vec2.generic[59]) ? 0xFF : 0, 1); + memset(&vec1.generic[60], (vec1.generic[60] >= vec2.generic[60]) ? 0xFF : 0, 1); + memset(&vec1.generic[61], (vec1.generic[61] >= vec2.generic[61]) ? 0xFF : 0, 1); + memset(&vec1.generic[62], (vec1.generic[62] >= vec2.generic[62]) ? 0xFF : 0, 1); + memset(&vec1.generic[63], (vec1.generic[63] >= vec2.generic[63]) ? 0xFF : 0, 1); + return vec1; +} +# define VUINT8x64_CMPGE_DEFINED +#endif +#if !defined(VUINT8x64_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] < vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] < vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] < vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] < vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] < vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] < vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] < vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] < vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] < vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] < vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] < vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] < vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] < vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] < vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] < vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] < vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] < vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] < vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] < vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] < vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] < vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] < vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] < vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] < vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] < vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] < vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] < vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] < vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] < vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] < vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] < vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] < vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]); + return vec1; +} +# define VUINT8x64_MIN_DEFINED +#endif +#if !defined(VUINT8x64_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + vec1.generic[32] = (vec1.generic[32] > vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]); + vec1.generic[33] = (vec1.generic[33] > vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]); + vec1.generic[34] = (vec1.generic[34] > vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]); + vec1.generic[35] = (vec1.generic[35] > vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]); + vec1.generic[36] = (vec1.generic[36] > vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]); + vec1.generic[37] = (vec1.generic[37] > vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]); + vec1.generic[38] = (vec1.generic[38] > vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]); + vec1.generic[39] = (vec1.generic[39] > vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]); + vec1.generic[40] = (vec1.generic[40] > vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]); + vec1.generic[41] = (vec1.generic[41] > vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]); + vec1.generic[42] = (vec1.generic[42] > vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]); + vec1.generic[43] = (vec1.generic[43] > vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]); + vec1.generic[44] = (vec1.generic[44] > vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]); + vec1.generic[45] = (vec1.generic[45] > vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]); + vec1.generic[46] = (vec1.generic[46] > vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]); + vec1.generic[47] = (vec1.generic[47] > vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]); + vec1.generic[48] = (vec1.generic[48] > vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]); + vec1.generic[49] = (vec1.generic[49] > vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]); + vec1.generic[50] = (vec1.generic[50] > vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]); + vec1.generic[51] = (vec1.generic[51] > vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]); + vec1.generic[52] = (vec1.generic[52] > vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]); + vec1.generic[53] = (vec1.generic[53] > vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]); + vec1.generic[54] = (vec1.generic[54] > vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]); + vec1.generic[55] = (vec1.generic[55] > vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]); + vec1.generic[56] = (vec1.generic[56] > vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]); + vec1.generic[57] = (vec1.generic[57] > vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]); + vec1.generic[58] = (vec1.generic[58] > vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]); + vec1.generic[59] = (vec1.generic[59] > vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]); + vec1.generic[60] = (vec1.generic[60] > vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]); + vec1.generic[61] = (vec1.generic[61] > vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]); + vec1.generic[62] = (vec1.generic[62] > vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]); + vec1.generic[63] = (vec1.generic[63] > vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]); + return vec1; +} +# define VUINT8x64_MAX_DEFINED +#endif +#if !defined(VUINT8x64_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + vec1.generic[32] >>= vec2.generic[0]; + vec1.generic[33] >>= vec2.generic[0]; + vec1.generic[34] >>= vec2.generic[0]; + vec1.generic[35] >>= vec2.generic[0]; + vec1.generic[36] >>= vec2.generic[0]; + vec1.generic[37] >>= vec2.generic[0]; + vec1.generic[38] >>= vec2.generic[0]; + vec1.generic[39] >>= vec2.generic[0]; + vec1.generic[40] >>= vec2.generic[0]; + vec1.generic[41] >>= vec2.generic[0]; + vec1.generic[42] >>= vec2.generic[0]; + vec1.generic[43] >>= vec2.generic[0]; + vec1.generic[44] >>= vec2.generic[0]; + vec1.generic[45] >>= vec2.generic[0]; + vec1.generic[46] >>= vec2.generic[0]; + vec1.generic[47] >>= vec2.generic[0]; + vec1.generic[48] >>= vec2.generic[0]; + vec1.generic[49] >>= vec2.generic[0]; + vec1.generic[50] >>= vec2.generic[0]; + vec1.generic[51] >>= vec2.generic[0]; + vec1.generic[52] >>= vec2.generic[0]; + vec1.generic[53] >>= vec2.generic[0]; + vec1.generic[54] >>= vec2.generic[0]; + vec1.generic[55] >>= vec2.generic[0]; + vec1.generic[56] >>= vec2.generic[0]; + vec1.generic[57] >>= vec2.generic[0]; + vec1.generic[58] >>= vec2.generic[0]; + vec1.generic[59] >>= vec2.generic[0]; + vec1.generic[60] >>= vec2.generic[0]; + vec1.generic[61] >>= vec2.generic[0]; + vec1.generic[62] >>= vec2.generic[0]; + vec1.generic[63] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x64_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x64_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + vec1.generic[32] >>= vec2.generic[0]; + vec1.generic[33] >>= vec2.generic[0]; + vec1.generic[34] >>= vec2.generic[0]; + vec1.generic[35] >>= vec2.generic[0]; + vec1.generic[36] >>= vec2.generic[0]; + vec1.generic[37] >>= vec2.generic[0]; + vec1.generic[38] >>= vec2.generic[0]; + vec1.generic[39] >>= vec2.generic[0]; + vec1.generic[40] >>= vec2.generic[0]; + vec1.generic[41] >>= vec2.generic[0]; + vec1.generic[42] >>= vec2.generic[0]; + vec1.generic[43] >>= vec2.generic[0]; + vec1.generic[44] >>= vec2.generic[0]; + vec1.generic[45] >>= vec2.generic[0]; + vec1.generic[46] >>= vec2.generic[0]; + vec1.generic[47] >>= vec2.generic[0]; + vec1.generic[48] >>= vec2.generic[0]; + vec1.generic[49] >>= vec2.generic[0]; + vec1.generic[50] >>= vec2.generic[0]; + vec1.generic[51] >>= vec2.generic[0]; + vec1.generic[52] >>= vec2.generic[0]; + vec1.generic[53] >>= vec2.generic[0]; + vec1.generic[54] >>= vec2.generic[0]; + vec1.generic[55] >>= vec2.generic[0]; + vec1.generic[56] >>= vec2.generic[0]; + vec1.generic[57] >>= vec2.generic[0]; + vec1.generic[58] >>= vec2.generic[0]; + vec1.generic[59] >>= vec2.generic[0]; + vec1.generic[60] >>= vec2.generic[0]; + vec1.generic[61] >>= vec2.generic[0]; + vec1.generic[62] >>= vec2.generic[0]; + vec1.generic[63] >>= vec2.generic[0]; + return vec1; +} +# define VUINT8x64_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x64_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + vec1.generic[16] <<= vec2.generic[0]; + vec1.generic[17] <<= vec2.generic[0]; + vec1.generic[18] <<= vec2.generic[0]; + vec1.generic[19] <<= vec2.generic[0]; + vec1.generic[20] <<= vec2.generic[0]; + vec1.generic[21] <<= vec2.generic[0]; + vec1.generic[22] <<= vec2.generic[0]; + vec1.generic[23] <<= vec2.generic[0]; + vec1.generic[24] <<= vec2.generic[0]; + vec1.generic[25] <<= vec2.generic[0]; + vec1.generic[26] <<= vec2.generic[0]; + vec1.generic[27] <<= vec2.generic[0]; + vec1.generic[28] <<= vec2.generic[0]; + vec1.generic[29] <<= vec2.generic[0]; + vec1.generic[30] <<= vec2.generic[0]; + vec1.generic[31] <<= vec2.generic[0]; + vec1.generic[32] <<= vec2.generic[0]; + vec1.generic[33] <<= vec2.generic[0]; + vec1.generic[34] <<= vec2.generic[0]; + vec1.generic[35] <<= vec2.generic[0]; + vec1.generic[36] <<= vec2.generic[0]; + vec1.generic[37] <<= vec2.generic[0]; + vec1.generic[38] <<= vec2.generic[0]; + vec1.generic[39] <<= vec2.generic[0]; + vec1.generic[40] <<= vec2.generic[0]; + vec1.generic[41] <<= vec2.generic[0]; + vec1.generic[42] <<= vec2.generic[0]; + vec1.generic[43] <<= vec2.generic[0]; + vec1.generic[44] <<= vec2.generic[0]; + vec1.generic[45] <<= vec2.generic[0]; + vec1.generic[46] <<= vec2.generic[0]; + vec1.generic[47] <<= vec2.generic[0]; + vec1.generic[48] <<= vec2.generic[0]; + vec1.generic[49] <<= vec2.generic[0]; + vec1.generic[50] <<= vec2.generic[0]; + vec1.generic[51] <<= vec2.generic[0]; + vec1.generic[52] <<= vec2.generic[0]; + vec1.generic[53] <<= vec2.generic[0]; + vec1.generic[54] <<= vec2.generic[0]; + vec1.generic[55] <<= vec2.generic[0]; + vec1.generic[56] <<= vec2.generic[0]; + vec1.generic[57] <<= vec2.generic[0]; + vec1.generic[58] <<= vec2.generic[0]; + vec1.generic[59] <<= vec2.generic[0]; + vec1.generic[60] <<= vec2.generic[0]; + vec1.generic[61] <<= vec2.generic[0]; + vec1.generic[62] <<= vec2.generic[0]; + vec1.generic[63] <<= vec2.generic[0]; + return vec1; +} +# define VUINT8x64_LSHIFT_DEFINED +#endif +#if !defined(VINT16x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x) +{ + vint16x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VINT16x2_SPLAT_DEFINED #endif -#ifndef VINT16x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 16, 2) +#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2]) +{ + vint16x2 vec; + memcpy(vec.generic, x, 4); + return vec; +} # define VINT16x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x2_LOAD_DEFINED -VEC_GENERIC_LOAD(/* nothing */, 16, 2) +#if !defined(VINT16x2_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2]) +{ + vint16x2 vec; + memcpy(vec.generic, x, 4); + return vec; +} # define VINT16x2_LOAD_DEFINED #endif -#ifndef VINT16x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(/* nothing */, 16, 2) +#if !defined(VINT16x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2]) +{ + memcpy(x, vec.generic, 4); +} # define VINT16x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x2_STORE_DEFINED -VEC_GENERIC_STORE(/* nothing */, 16, 2) +#if !defined(VINT16x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2]) +{ + memcpy(x, vec.generic, 4); +} # define VINT16x2_STORE_DEFINED #endif -#ifndef VINT16x2_ADD_DEFINED -VEC_GENERIC_ADD(/* nothing */, 16, 2) +#if !defined(VINT16x2_ADD_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VINT16x2_ADD_DEFINED #endif -#ifndef VINT16x2_SUB_DEFINED -VEC_GENERIC_SUB(/* nothing */, 16, 2) +#if !defined(VINT16x2_SUB_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VINT16x2_SUB_DEFINED #endif -#ifndef VINT16x2_MUL_DEFINED -VEC_GENERIC_MUL(/* nothing */, 16, 2) +#if !defined(VINT16x2_MUL_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VINT16x2_MUL_DEFINED #endif -#ifndef VINT16x2_DIV_DEFINED -VEC_GENERIC_DIV(/* nothing */, 16, 2) +#if !defined(VINT16x2_DIV_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_div(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VINT16x2_DIV_DEFINED #endif -#ifndef VINT16x2_MOD_DEFINED -VEC_GENERIC_MOD(/* nothing */, 16, 2) +#if !defined(VINT16x2_MOD_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_mod(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VINT16x2_MOD_DEFINED #endif -#ifndef VINT16x2_AVG_DEFINED -VEC_GENERIC_AVG(/* nothing */, 16, 2) +#if !defined(VINT16x2_AVG_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2) +{ + vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} # define VINT16x2_AVG_DEFINED #endif -#ifndef VINT16x2_AND_DEFINED -VEC_GENERIC_AND(/* nothing */, 16, 2) +#if !defined(VINT16x2_AND_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VINT16x2_AND_DEFINED #endif -#ifndef VINT16x2_OR_DEFINED -VEC_GENERIC_OR(/* nothing */, 16, 2) +#if !defined(VINT16x2_OR_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VINT16x2_OR_DEFINED #endif -#ifndef VINT16x2_XOR_DEFINED -VEC_GENERIC_XOR(/* nothing */, 16, 2) +#if !defined(VINT16x2_XOR_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VINT16x2_XOR_DEFINED #endif -#ifndef VINT16x2_NOT_DEFINED -VEC_GENERIC_NOT(/* nothing */, 16, 2) +#if !defined(VINT16x2_NOT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VINT16x2_NOT_DEFINED #endif -#ifndef VINT16x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(/* nothing */, 16, 2) +#if !defined(VINT16x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VINT16x2_CMPLT_DEFINED #endif -#ifndef VINT16x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(/* nothing */, 16, 2) +#if !defined(VINT16x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VINT16x2_CMPEQ_DEFINED #endif -#ifndef VINT16x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(/* nothing */, 16, 2) +#if !defined(VINT16x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VINT16x2_CMPGT_DEFINED #endif -#ifndef VINT16x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(/* nothing */, 16, 2) +#if !defined(VINT16x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VINT16x2_CMPLE_DEFINED #endif -#ifndef VINT16x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(/* nothing */, 16, 2) +#if !defined(VINT16x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VINT16x2_CMPGE_DEFINED #endif -#ifndef VINT16x2_MIN_DEFINED -VEC_GENERIC_MIN(/* nothing */, 16, 2) +#if !defined(VINT16x2_MIN_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT16x2_MIN_DEFINED #endif -#ifndef VINT16x2_MAX_DEFINED -VEC_GENERIC_MAX(/* nothing */, 16, 2) +#if !defined(VINT16x2_MAX_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT16x2_MAX_DEFINED #endif -#ifndef VINT16x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(/* nothing */, 16, 2) +#if !defined(VINT16x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); + return vec1; +} # define VINT16x2_RSHIFT_DEFINED #endif -#ifndef VINT16x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(/* nothing */, 16, 2) +#if !defined(VINT16x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT16x2_LRSHIFT_DEFINED #endif -#ifndef VINT16x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(/* nothing */, 16, 2) +#if !defined(VINT16x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT16x2_LSHIFT_DEFINED #endif - - -/* vint16x2 */ - -#ifndef VUINT16x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(u, 16, 2) +#if !defined(VUINT16x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x) +{ + vuint16x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VUINT16x2_SPLAT_DEFINED #endif -#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(u, 16, 2) +#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2]) +{ + vuint16x2 vec; + memcpy(vec.generic, x, 4); + return vec; +} # define VUINT16x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x2_LOAD_DEFINED -VEC_GENERIC_LOAD(u, 16, 2) +#if !defined(VUINT16x2_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2]) +{ + vuint16x2 vec; + memcpy(vec.generic, x, 4); + return vec; +} # define VUINT16x2_LOAD_DEFINED #endif -#ifndef VUINT16x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(u, 16, 2) +#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2]) +{ + memcpy(x, vec.generic, 4); +} # define VUINT16x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x2_STORE_DEFINED -VEC_GENERIC_STORE(u, 16, 2) +#if !defined(VUINT16x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2]) +{ + memcpy(x, vec.generic, 4); +} # define VUINT16x2_STORE_DEFINED #endif -#ifndef VUINT16x2_ADD_DEFINED -VEC_GENERIC_ADD(u, 16, 2) +#if !defined(VUINT16x2_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VUINT16x2_ADD_DEFINED #endif -#ifndef VUINT16x2_SUB_DEFINED -VEC_GENERIC_SUB(u, 16, 2) +#if !defined(VUINT16x2_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VUINT16x2_SUB_DEFINED #endif -#ifndef VUINT16x2_MUL_DEFINED -VEC_GENERIC_MUL(u, 16, 2) +#if !defined(VUINT16x2_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VUINT16x2_MUL_DEFINED #endif -#ifndef VUINT16x2_DIV_DEFINED -VEC_GENERIC_DIV(u, 16, 2) +#if !defined(VUINT16x2_DIV_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_div(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VUINT16x2_DIV_DEFINED #endif -#ifndef VUINT16x2_MOD_DEFINED -VEC_GENERIC_MOD(u, 16, 2) +#if !defined(VUINT16x2_MOD_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_mod(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VUINT16x2_MOD_DEFINED #endif -#ifndef VUINT16x2_AVG_DEFINED -VEC_GENERIC_AVG(u, 16, 2) +#if !defined(VUINT16x2_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); + return vec1; +} # define VUINT16x2_AVG_DEFINED #endif -#ifndef VUINT16x2_AND_DEFINED -VEC_GENERIC_AND(u, 16, 2) +#if !defined(VUINT16x2_AND_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VUINT16x2_AND_DEFINED #endif -#ifndef VUINT16x2_OR_DEFINED -VEC_GENERIC_OR(u, 16, 2) +#if !defined(VUINT16x2_OR_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VUINT16x2_OR_DEFINED #endif -#ifndef VUINT16x2_XOR_DEFINED -VEC_GENERIC_XOR(u, 16, 2) +#if !defined(VUINT16x2_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VUINT16x2_XOR_DEFINED #endif -#ifndef VUINT16x2_NOT_DEFINED -VEC_GENERIC_NOT(u, 16, 2) +#if !defined(VUINT16x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VUINT16x2_NOT_DEFINED #endif -#ifndef VUINT16x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(u, 16, 2) +#if !defined(VUINT16x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VUINT16x2_CMPLT_DEFINED #endif -#ifndef VUINT16x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(u, 16, 2) +#if !defined(VUINT16x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VUINT16x2_CMPEQ_DEFINED #endif -#ifndef VUINT16x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(u, 16, 2) +#if !defined(VUINT16x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VUINT16x2_CMPGT_DEFINED #endif -#ifndef VUINT16x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(u, 16, 2) +#if !defined(VUINT16x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VUINT16x2_CMPLE_DEFINED #endif -#ifndef VUINT16x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(u, 16, 2) +#if !defined(VUINT16x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + return vec1; +} # define VUINT16x2_CMPGE_DEFINED #endif -#ifndef VUINT16x2_MIN_DEFINED -VEC_GENERIC_MIN(u, 16, 2) +#if !defined(VUINT16x2_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT16x2_MIN_DEFINED #endif -#ifndef VUINT16x2_MAX_DEFINED -VEC_GENERIC_MAX(u, 16, 2) +#if !defined(VUINT16x2_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT16x2_MAX_DEFINED #endif -#ifndef VUINT16x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(u, 16, 2) +#if !defined(VUINT16x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT16x2_RSHIFT_DEFINED #endif -#ifndef VUINT16x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(u, 16, 2) +#if !defined(VUINT16x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT16x2_LRSHIFT_DEFINED #endif -#ifndef VUINT16x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(u, 16, 2) +#if !defined(VUINT16x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + return vec1; +} # define VUINT16x2_LSHIFT_DEFINED #endif - - -/* vuint32x2 */ - -#ifndef VINT32x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(/* nothing */, 32, 2) +#if !defined(VINT16x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x) +{ + vint16x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VINT16x4_SPLAT_DEFINED +#endif +#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4]) +{ + vint16x4 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VINT16x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x4_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4]) +{ + vint16x4 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VINT16x4_LOAD_DEFINED +#endif +#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4]) +{ + memcpy(x, vec.generic, 8); +} +# define VINT16x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4]) +{ + memcpy(x, vec.generic, 8); +} +# define VINT16x4_STORE_DEFINED +#endif +#if !defined(VINT16x4_ADD_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VINT16x4_ADD_DEFINED +#endif +#if !defined(VINT16x4_SUB_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VINT16x4_SUB_DEFINED +#endif +#if !defined(VINT16x4_MUL_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VINT16x4_MUL_DEFINED +#endif +#if !defined(VINT16x4_DIV_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_div(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VINT16x4_DIV_DEFINED +#endif +#if !defined(VINT16x4_MOD_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_mod(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VINT16x4_MOD_DEFINED +#endif +#if !defined(VINT16x4_AVG_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2) +{ + vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT16x4_AVG_DEFINED +#endif +#if !defined(VINT16x4_AND_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VINT16x4_AND_DEFINED +#endif +#if !defined(VINT16x4_OR_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VINT16x4_OR_DEFINED +#endif +#if !defined(VINT16x4_XOR_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VINT16x4_XOR_DEFINED +#endif +#if !defined(VINT16x4_NOT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VINT16x4_NOT_DEFINED +#endif +#if !defined(VINT16x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x4_CMPLT_DEFINED +#endif +#if !defined(VINT16x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x4_CMPEQ_DEFINED +#endif +#if !defined(VINT16x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x4_CMPGT_DEFINED +#endif +#if !defined(VINT16x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x4_CMPLE_DEFINED +#endif +#if !defined(VINT16x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x4_CMPGE_DEFINED +#endif +#if !defined(VINT16x4_MIN_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT16x4_MIN_DEFINED +#endif +#if !defined(VINT16x4_MAX_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT16x4_MAX_DEFINED +#endif +#if !defined(VINT16x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); + return vec1; +} +# define VINT16x4_RSHIFT_DEFINED +#endif +#if !defined(VINT16x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT16x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT16x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x) +{ + vuint16x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VUINT16x4_SPLAT_DEFINED +#endif +#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4]) +{ + vuint16x4 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VUINT16x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x4_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4]) +{ + vuint16x4 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VUINT16x4_LOAD_DEFINED +#endif +#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4]) +{ + memcpy(x, vec.generic, 8); +} +# define VUINT16x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4]) +{ + memcpy(x, vec.generic, 8); +} +# define VUINT16x4_STORE_DEFINED +#endif +#if !defined(VUINT16x4_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VUINT16x4_ADD_DEFINED +#endif +#if !defined(VUINT16x4_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VUINT16x4_SUB_DEFINED +#endif +#if !defined(VUINT16x4_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VUINT16x4_MUL_DEFINED +#endif +#if !defined(VUINT16x4_DIV_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_div(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VUINT16x4_DIV_DEFINED +#endif +#if !defined(VUINT16x4_MOD_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_mod(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VUINT16x4_MOD_DEFINED +#endif +#if !defined(VUINT16x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); + return vec1; +} +# define VUINT16x4_AVG_DEFINED +#endif +#if !defined(VUINT16x4_AND_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VUINT16x4_AND_DEFINED +#endif +#if !defined(VUINT16x4_OR_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VUINT16x4_OR_DEFINED +#endif +#if !defined(VUINT16x4_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VUINT16x4_XOR_DEFINED +#endif +#if !defined(VUINT16x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VUINT16x4_NOT_DEFINED +#endif +#if !defined(VUINT16x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x4_CMPLT_DEFINED +#endif +#if !defined(VUINT16x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x4_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x4_CMPGT_DEFINED +#endif +#if !defined(VUINT16x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x4_CMPLE_DEFINED +#endif +#if !defined(VUINT16x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x4_CMPGE_DEFINED +#endif +#if !defined(VUINT16x4_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT16x4_MIN_DEFINED +#endif +#if !defined(VUINT16x4_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT16x4_MAX_DEFINED +#endif +#if !defined(VUINT16x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x4_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + return vec1; +} +# define VUINT16x4_LSHIFT_DEFINED +#endif +#if !defined(VINT16x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) +{ + vint16x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VINT16x8_SPLAT_DEFINED +#endif +#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) +{ + vint16x8 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT16x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x8_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) +{ + vint16x8 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT16x8_LOAD_DEFINED +#endif +#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT16x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT16x8_STORE_DEFINED +#endif +#if !defined(VINT16x8_ADD_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VINT16x8_ADD_DEFINED +#endif +#if !defined(VINT16x8_SUB_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VINT16x8_SUB_DEFINED +#endif +#if !defined(VINT16x8_MUL_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VINT16x8_MUL_DEFINED +#endif +#if !defined(VINT16x8_DIV_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_div(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VINT16x8_DIV_DEFINED +#endif +#if !defined(VINT16x8_MOD_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_mod(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VINT16x8_MOD_DEFINED +#endif +#if !defined(VINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +{ + vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT16x8_AVG_DEFINED +#endif +#if !defined(VINT16x8_AND_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VINT16x8_AND_DEFINED +#endif +#if !defined(VINT16x8_OR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VINT16x8_OR_DEFINED +#endif +#if !defined(VINT16x8_XOR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VINT16x8_XOR_DEFINED +#endif +#if !defined(VINT16x8_NOT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VINT16x8_NOT_DEFINED +#endif +#if !defined(VINT16x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x8_CMPLT_DEFINED +#endif +#if !defined(VINT16x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x8_CMPEQ_DEFINED +#endif +#if !defined(VINT16x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x8_CMPGT_DEFINED +#endif +#if !defined(VINT16x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x8_CMPLE_DEFINED +#endif +#if !defined(VINT16x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x8_CMPGE_DEFINED +#endif +#if !defined(VINT16x8_MIN_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT16x8_MIN_DEFINED +#endif +#if !defined(VINT16x8_MAX_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT16x8_MAX_DEFINED +#endif +#if !defined(VINT16x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); + return vec1; +} +# define VINT16x8_RSHIFT_DEFINED +#endif +#if !defined(VINT16x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT16x8_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) +{ + vuint16x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VUINT16x8_SPLAT_DEFINED +#endif +#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) +{ + vuint16x8 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT16x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x8_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) +{ + vuint16x8 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT16x8_LOAD_DEFINED +#endif +#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT16x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT16x8_STORE_DEFINED +#endif +#if !defined(VUINT16x8_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VUINT16x8_ADD_DEFINED +#endif +#if !defined(VUINT16x8_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VUINT16x8_SUB_DEFINED +#endif +#if !defined(VUINT16x8_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VUINT16x8_MUL_DEFINED +#endif +#if !defined(VUINT16x8_DIV_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_div(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VUINT16x8_DIV_DEFINED +#endif +#if !defined(VUINT16x8_MOD_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_mod(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VUINT16x8_MOD_DEFINED +#endif +#if !defined(VUINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); + return vec1; +} +# define VUINT16x8_AVG_DEFINED +#endif +#if !defined(VUINT16x8_AND_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VUINT16x8_AND_DEFINED +#endif +#if !defined(VUINT16x8_OR_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VUINT16x8_OR_DEFINED +#endif +#if !defined(VUINT16x8_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VUINT16x8_XOR_DEFINED +#endif +#if !defined(VUINT16x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VUINT16x8_NOT_DEFINED +#endif +#if !defined(VUINT16x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x8_CMPLT_DEFINED +#endif +#if !defined(VUINT16x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x8_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x8_CMPGT_DEFINED +#endif +#if !defined(VUINT16x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x8_CMPLE_DEFINED +#endif +#if !defined(VUINT16x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x8_CMPGE_DEFINED +#endif +#if !defined(VUINT16x8_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT16x8_MIN_DEFINED +#endif +#if !defined(VUINT16x8_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT16x8_MAX_DEFINED +#endif +#if !defined(VUINT16x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + return vec1; +} +# define VUINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VINT16x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x) +{ + vint16x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VINT16x16_SPLAT_DEFINED +#endif +#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16]) +{ + vint16x16 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT16x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x16_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16]) +{ + vint16x16 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT16x16_LOAD_DEFINED +#endif +#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT16x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT16x16_STORE_DEFINED +#endif +#if !defined(VINT16x16_ADD_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VINT16x16_ADD_DEFINED +#endif +#if !defined(VINT16x16_SUB_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VINT16x16_SUB_DEFINED +#endif +#if !defined(VINT16x16_MUL_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VINT16x16_MUL_DEFINED +#endif +#if !defined(VINT16x16_DIV_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_div(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VINT16x16_DIV_DEFINED +#endif +#if !defined(VINT16x16_MOD_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_mod(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VINT16x16_MOD_DEFINED +#endif +#if !defined(VINT16x16_AVG_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2) +{ + vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT16x16_AVG_DEFINED +#endif +#if !defined(VINT16x16_AND_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VINT16x16_AND_DEFINED +#endif +#if !defined(VINT16x16_OR_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VINT16x16_OR_DEFINED +#endif +#if !defined(VINT16x16_XOR_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VINT16x16_XOR_DEFINED +#endif +#if !defined(VINT16x16_NOT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VINT16x16_NOT_DEFINED +#endif +#if !defined(VINT16x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x16_CMPLT_DEFINED +#endif +#if !defined(VINT16x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x16_CMPEQ_DEFINED +#endif +#if !defined(VINT16x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x16_CMPGT_DEFINED +#endif +#if !defined(VINT16x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x16_CMPLE_DEFINED +#endif +#if !defined(VINT16x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x16_CMPGE_DEFINED +#endif +#if !defined(VINT16x16_MIN_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT16x16_MIN_DEFINED +#endif +#if !defined(VINT16x16_MAX_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT16x16_MAX_DEFINED +#endif +#if !defined(VINT16x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); + return vec1; +} +# define VINT16x16_RSHIFT_DEFINED +#endif +#if !defined(VINT16x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT16x16_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT16x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x) +{ + vuint16x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VUINT16x16_SPLAT_DEFINED +#endif +#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16]) +{ + vuint16x16 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT16x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x16_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16]) +{ + vuint16x16 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT16x16_LOAD_DEFINED +#endif +#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT16x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT16x16_STORE_DEFINED +#endif +#if !defined(VUINT16x16_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VUINT16x16_ADD_DEFINED +#endif +#if !defined(VUINT16x16_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VUINT16x16_SUB_DEFINED +#endif +#if !defined(VUINT16x16_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VUINT16x16_MUL_DEFINED +#endif +#if !defined(VUINT16x16_DIV_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_div(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VUINT16x16_DIV_DEFINED +#endif +#if !defined(VUINT16x16_MOD_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_mod(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VUINT16x16_MOD_DEFINED +#endif +#if !defined(VUINT16x16_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); + return vec1; +} +# define VUINT16x16_AVG_DEFINED +#endif +#if !defined(VUINT16x16_AND_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VUINT16x16_AND_DEFINED +#endif +#if !defined(VUINT16x16_OR_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VUINT16x16_OR_DEFINED +#endif +#if !defined(VUINT16x16_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VUINT16x16_XOR_DEFINED +#endif +#if !defined(VUINT16x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VUINT16x16_NOT_DEFINED +#endif +#if !defined(VUINT16x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x16_CMPLT_DEFINED +#endif +#if !defined(VUINT16x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x16_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x16_CMPGT_DEFINED +#endif +#if !defined(VUINT16x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x16_CMPLE_DEFINED +#endif +#if !defined(VUINT16x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x16_CMPGE_DEFINED +#endif +#if !defined(VUINT16x16_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT16x16_MIN_DEFINED +#endif +#if !defined(VUINT16x16_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT16x16_MAX_DEFINED +#endif +#if !defined(VUINT16x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x16_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x16_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + return vec1; +} +# define VUINT16x16_LSHIFT_DEFINED +#endif +#if !defined(VINT16x32_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x) +{ + vint16x32 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + return vec; +} +# define VINT16x32_SPLAT_DEFINED +#endif +#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32]) +{ + vint16x32 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT16x32_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x32_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32]) +{ + vint16x32 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT16x32_LOAD_DEFINED +#endif +#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT16x32_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x32_STORE_DEFINED) +VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT16x32_STORE_DEFINED +#endif +#if !defined(VINT16x32_ADD_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + return vec1; +} +# define VINT16x32_ADD_DEFINED +#endif +#if !defined(VINT16x32_SUB_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + return vec1; +} +# define VINT16x32_SUB_DEFINED +#endif +#if !defined(VINT16x32_MUL_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + return vec1; +} +# define VINT16x32_MUL_DEFINED +#endif +#if !defined(VINT16x32_DIV_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_div(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + return vec1; +} +# define VINT16x32_DIV_DEFINED +#endif +#if !defined(VINT16x32_MOD_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_mod(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + return vec1; +} +# define VINT16x32_MOD_DEFINED +#endif +#if !defined(VINT16x32_AVG_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2) +{ + vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[16] % 2); + y_d_rem = (vec2.generic[16] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[17] % 2); + y_d_rem = (vec2.generic[17] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[18] % 2); + y_d_rem = (vec2.generic[18] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[19] % 2); + y_d_rem = (vec2.generic[19] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[20] % 2); + y_d_rem = (vec2.generic[20] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[21] % 2); + y_d_rem = (vec2.generic[21] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[22] % 2); + y_d_rem = (vec2.generic[22] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[23] % 2); + y_d_rem = (vec2.generic[23] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[24] % 2); + y_d_rem = (vec2.generic[24] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[25] % 2); + y_d_rem = (vec2.generic[25] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[26] % 2); + y_d_rem = (vec2.generic[26] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[27] % 2); + y_d_rem = (vec2.generic[27] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[28] % 2); + y_d_rem = (vec2.generic[28] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[29] % 2); + y_d_rem = (vec2.generic[29] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[30] % 2); + y_d_rem = (vec2.generic[30] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[31] % 2); + y_d_rem = (vec2.generic[31] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT16x32_AVG_DEFINED +#endif +#if !defined(VINT16x32_AND_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + return vec1; +} +# define VINT16x32_AND_DEFINED +#endif +#if !defined(VINT16x32_OR_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + return vec1; +} +# define VINT16x32_OR_DEFINED +#endif +#if !defined(VINT16x32_XOR_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + return vec1; +} +# define VINT16x32_XOR_DEFINED +#endif +#if !defined(VINT16x32_NOT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + return vec; +} +# define VINT16x32_NOT_DEFINED +#endif +#if !defined(VINT16x32_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x32_CMPLT_DEFINED +#endif +#if !defined(VINT16x32_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x32_CMPEQ_DEFINED +#endif +#if !defined(VINT16x32_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x32_CMPGT_DEFINED +#endif +#if !defined(VINT16x32_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x32_CMPLE_DEFINED +#endif +#if !defined(VINT16x32_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VINT16x32_CMPGE_DEFINED +#endif +#if !defined(VINT16x32_MIN_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VINT16x32_MIN_DEFINED +#endif +#if !defined(VINT16x32_MAX_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VINT16x32_MAX_DEFINED +#endif +#if !defined(VINT16x32_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); +vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]); +vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]); +vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]); +vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]); +vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]); +vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]); +vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]); +vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]); +vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]); +vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]); +vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]); +vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]); +vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]); +vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]); +vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]); +vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]); + return vec1; +} +# define VINT16x32_RSHIFT_DEFINED +#endif +#if !defined(VINT16x32_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u >>= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u >>= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u >>= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u >>= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u >>= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u >>= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u >>= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u >>= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u >>= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u >>= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u >>= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u >>= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u >>= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u >>= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u >>= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u >>= vec2.generic[31]; + vec1.generic[31] = x.s; + return vec1; +} +# define VINT16x32_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x32_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2) +{ + union { vec_uint16 u; vec_int16 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + x.s = vec1.generic[16]; + x.u <<= vec2.generic[16]; + vec1.generic[16] = x.s; + x.s = vec1.generic[17]; + x.u <<= vec2.generic[17]; + vec1.generic[17] = x.s; + x.s = vec1.generic[18]; + x.u <<= vec2.generic[18]; + vec1.generic[18] = x.s; + x.s = vec1.generic[19]; + x.u <<= vec2.generic[19]; + vec1.generic[19] = x.s; + x.s = vec1.generic[20]; + x.u <<= vec2.generic[20]; + vec1.generic[20] = x.s; + x.s = vec1.generic[21]; + x.u <<= vec2.generic[21]; + vec1.generic[21] = x.s; + x.s = vec1.generic[22]; + x.u <<= vec2.generic[22]; + vec1.generic[22] = x.s; + x.s = vec1.generic[23]; + x.u <<= vec2.generic[23]; + vec1.generic[23] = x.s; + x.s = vec1.generic[24]; + x.u <<= vec2.generic[24]; + vec1.generic[24] = x.s; + x.s = vec1.generic[25]; + x.u <<= vec2.generic[25]; + vec1.generic[25] = x.s; + x.s = vec1.generic[26]; + x.u <<= vec2.generic[26]; + vec1.generic[26] = x.s; + x.s = vec1.generic[27]; + x.u <<= vec2.generic[27]; + vec1.generic[27] = x.s; + x.s = vec1.generic[28]; + x.u <<= vec2.generic[28]; + vec1.generic[28] = x.s; + x.s = vec1.generic[29]; + x.u <<= vec2.generic[29]; + vec1.generic[29] = x.s; + x.s = vec1.generic[30]; + x.u <<= vec2.generic[30]; + vec1.generic[30] = x.s; + x.s = vec1.generic[31]; + x.u <<= vec2.generic[31]; + vec1.generic[31] = x.s; + return vec1; +} +# define VINT16x32_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x32_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x) +{ + vuint16x32 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + vec.generic[16] = x; + vec.generic[17] = x; + vec.generic[18] = x; + vec.generic[19] = x; + vec.generic[20] = x; + vec.generic[21] = x; + vec.generic[22] = x; + vec.generic[23] = x; + vec.generic[24] = x; + vec.generic[25] = x; + vec.generic[26] = x; + vec.generic[27] = x; + vec.generic[28] = x; + vec.generic[29] = x; + vec.generic[30] = x; + vec.generic[31] = x; + return vec; +} +# define VUINT16x32_SPLAT_DEFINED +#endif +#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32]) +{ + vuint16x32 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT16x32_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x32_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32]) +{ + vuint16x32 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT16x32_LOAD_DEFINED +#endif +#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT16x32_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x32_STORE_DEFINED) +VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT16x32_STORE_DEFINED +#endif +#if !defined(VUINT16x32_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]); + return vec1; +} +# define VUINT16x32_ADD_DEFINED +#endif +#if !defined(VUINT16x32_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]); + return vec1; +} +# define VUINT16x32_SUB_DEFINED +#endif +#if !defined(VUINT16x32_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]); + return vec1; +} +# define VUINT16x32_MUL_DEFINED +#endif +#if !defined(VUINT16x32_DIV_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_div(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0); + return vec1; +} +# define VUINT16x32_DIV_DEFINED +#endif +#if !defined(VUINT16x32_MOD_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_mod(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0); + vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0); + vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0); + vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0); + vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0); + vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0); + vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0); + vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0); + vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0); + vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0); + vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0); + vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0); + vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0); + vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0); + vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0); + vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0); + return vec1; +} +# define VUINT16x32_MOD_DEFINED +#endif +#if !defined(VUINT16x32_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); +vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1); +vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1); +vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1); +vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1); +vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1); +vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1); +vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1); +vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1); +vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1); +vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1); +vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1); +vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1); +vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1); +vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1); +vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1); +vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1); + return vec1; +} +# define VUINT16x32_AVG_DEFINED +#endif +#if !defined(VUINT16x32_AND_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]); + return vec1; +} +# define VUINT16x32_AND_DEFINED +#endif +#if !defined(VUINT16x32_OR_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]); + return vec1; +} +# define VUINT16x32_OR_DEFINED +#endif +#if !defined(VUINT16x32_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]); + return vec1; +} +# define VUINT16x32_XOR_DEFINED +#endif +#if !defined(VUINT16x32_NOT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + vec.generic[16] = ~vec.generic[16]; + vec.generic[17] = ~vec.generic[17]; + vec.generic[18] = ~vec.generic[18]; + vec.generic[19] = ~vec.generic[19]; + vec.generic[20] = ~vec.generic[20]; + vec.generic[21] = ~vec.generic[21]; + vec.generic[22] = ~vec.generic[22]; + vec.generic[23] = ~vec.generic[23]; + vec.generic[24] = ~vec.generic[24]; + vec.generic[25] = ~vec.generic[25]; + vec.generic[26] = ~vec.generic[26]; + vec.generic[27] = ~vec.generic[27]; + vec.generic[28] = ~vec.generic[28]; + vec.generic[29] = ~vec.generic[29]; + vec.generic[30] = ~vec.generic[30]; + vec.generic[31] = ~vec.generic[31]; + return vec; +} +# define VUINT16x32_NOT_DEFINED +#endif +#if !defined(VUINT16x32_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x32_CMPLT_DEFINED +#endif +#if !defined(VUINT16x32_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x32_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x32_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x32_CMPGT_DEFINED +#endif +#if !defined(VUINT16x32_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x32_CMPLE_DEFINED +#endif +#if !defined(VUINT16x32_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2); + memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 2); + memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 2); + memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 2); + memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 2); + memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 2); + memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 2); + memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 2); + memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 2); + memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 2); + memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 2); + memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 2); + memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 2); + memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 2); + memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 2); + memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 2); + memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 2); + return vec1; +} +# define VUINT16x32_CMPGE_DEFINED +#endif +#if !defined(VUINT16x32_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VUINT16x32_MIN_DEFINED +#endif +#if !defined(VUINT16x32_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]); + vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]); + vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]); + vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]); + vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]); + vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]); + vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]); + vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]); + vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]); + vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]); + vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]); + vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]); + vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]); + vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]); + vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]); + vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]); + return vec1; +} +# define VUINT16x32_MAX_DEFINED +#endif +#if !defined(VUINT16x32_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x32_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x32_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + vec1.generic[16] >>= vec2.generic[0]; + vec1.generic[17] >>= vec2.generic[0]; + vec1.generic[18] >>= vec2.generic[0]; + vec1.generic[19] >>= vec2.generic[0]; + vec1.generic[20] >>= vec2.generic[0]; + vec1.generic[21] >>= vec2.generic[0]; + vec1.generic[22] >>= vec2.generic[0]; + vec1.generic[23] >>= vec2.generic[0]; + vec1.generic[24] >>= vec2.generic[0]; + vec1.generic[25] >>= vec2.generic[0]; + vec1.generic[26] >>= vec2.generic[0]; + vec1.generic[27] >>= vec2.generic[0]; + vec1.generic[28] >>= vec2.generic[0]; + vec1.generic[29] >>= vec2.generic[0]; + vec1.generic[30] >>= vec2.generic[0]; + vec1.generic[31] >>= vec2.generic[0]; + return vec1; +} +# define VUINT16x32_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x32_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + vec1.generic[16] <<= vec2.generic[0]; + vec1.generic[17] <<= vec2.generic[0]; + vec1.generic[18] <<= vec2.generic[0]; + vec1.generic[19] <<= vec2.generic[0]; + vec1.generic[20] <<= vec2.generic[0]; + vec1.generic[21] <<= vec2.generic[0]; + vec1.generic[22] <<= vec2.generic[0]; + vec1.generic[23] <<= vec2.generic[0]; + vec1.generic[24] <<= vec2.generic[0]; + vec1.generic[25] <<= vec2.generic[0]; + vec1.generic[26] <<= vec2.generic[0]; + vec1.generic[27] <<= vec2.generic[0]; + vec1.generic[28] <<= vec2.generic[0]; + vec1.generic[29] <<= vec2.generic[0]; + vec1.generic[30] <<= vec2.generic[0]; + vec1.generic[31] <<= vec2.generic[0]; + return vec1; +} +# define VUINT16x32_LSHIFT_DEFINED +#endif +#if !defined(VINT32x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x) +{ + vint32x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VINT32x2_SPLAT_DEFINED #endif -#ifndef VINT32x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 32, 2) +#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2]) +{ + vint32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} # define VINT32x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x2_LOAD_DEFINED -VEC_GENERIC_LOAD(/* nothing */, 32, 2) +#if !defined(VINT32x2_LOAD_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2]) +{ + vint32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} # define VINT32x2_LOAD_DEFINED #endif -#ifndef VINT32x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(/* nothing */, 32, 2) +#if !defined(VINT32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2]) +{ + memcpy(x, vec.generic, 8); +} # define VINT32x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x2_STORE_DEFINED -VEC_GENERIC_STORE(/* nothing */, 32, 2) +#if !defined(VINT32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2]) +{ + memcpy(x, vec.generic, 8); +} # define VINT32x2_STORE_DEFINED #endif -#ifndef VINT32x2_ADD_DEFINED -VEC_GENERIC_ADD(/* nothing */, 32, 2) +#if !defined(VINT32x2_ADD_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VINT32x2_ADD_DEFINED #endif -#ifndef VINT32x2_SUB_DEFINED -VEC_GENERIC_SUB(/* nothing */, 32, 2) +#if !defined(VINT32x2_SUB_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VINT32x2_SUB_DEFINED #endif -#ifndef VINT32x2_MUL_DEFINED -VEC_GENERIC_MUL(/* nothing */, 32, 2) +#if !defined(VINT32x2_MUL_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VINT32x2_MUL_DEFINED #endif -#ifndef VINT32x2_DIV_DEFINED -VEC_GENERIC_DIV(/* nothing */, 32, 2) +#if !defined(VINT32x2_DIV_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_div(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VINT32x2_DIV_DEFINED #endif -#ifndef VINT32x2_MOD_DEFINED -VEC_GENERIC_MOD(/* nothing */, 32, 2) +#if !defined(VINT32x2_MOD_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_mod(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VINT32x2_MOD_DEFINED #endif -#ifndef VINT32x2_AVG_DEFINED -VEC_GENERIC_AVG(/* nothing */, 32, 2) +#if !defined(VINT32x2_AVG_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2) +{ + vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} # define VINT32x2_AVG_DEFINED #endif -#ifndef VINT32x2_AND_DEFINED -VEC_GENERIC_AND(/* nothing */, 32, 2) +#if !defined(VINT32x2_AND_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VINT32x2_AND_DEFINED #endif -#ifndef VINT32x2_OR_DEFINED -VEC_GENERIC_OR(/* nothing */, 32, 2) +#if !defined(VINT32x2_OR_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VINT32x2_OR_DEFINED #endif -#ifndef VINT32x2_XOR_DEFINED -VEC_GENERIC_XOR(/* nothing */, 32, 2) +#if !defined(VINT32x2_XOR_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VINT32x2_XOR_DEFINED #endif -#ifndef VINT32x2_NOT_DEFINED -VEC_GENERIC_NOT(/* nothing */, 32, 2) +#if !defined(VINT32x2_NOT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VINT32x2_NOT_DEFINED #endif -#ifndef VINT32x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(/* nothing */, 32, 2) +#if !defined(VINT32x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VINT32x2_CMPLT_DEFINED #endif -#ifndef VINT32x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(/* nothing */, 32, 2) +#if !defined(VINT32x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VINT32x2_CMPEQ_DEFINED #endif -#ifndef VINT32x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(/* nothing */, 32, 2) +#if !defined(VINT32x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VINT32x2_CMPGT_DEFINED #endif -#ifndef VINT32x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(/* nothing */, 32, 2) +#if !defined(VINT32x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VINT32x2_CMPLE_DEFINED #endif -#ifndef VINT32x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(/* nothing */, 32, 2) +#if !defined(VINT32x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VINT32x2_CMPGE_DEFINED #endif -#ifndef VINT32x2_MIN_DEFINED -VEC_GENERIC_MIN(/* nothing */, 32, 2) +#if !defined(VINT32x2_MIN_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT32x2_MIN_DEFINED #endif -#ifndef VINT32x2_MAX_DEFINED -VEC_GENERIC_MAX(/* nothing */, 32, 2) +#if !defined(VINT32x2_MAX_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT32x2_MAX_DEFINED #endif -#ifndef VINT32x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(/* nothing */, 32, 2) +#if !defined(VINT32x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); + return vec1; +} # define VINT32x2_RSHIFT_DEFINED #endif -#ifndef VINT32x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(/* nothing */, 32, 2) +#if !defined(VINT32x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT32x2_LRSHIFT_DEFINED #endif -#ifndef VINT32x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(/* nothing */, 32, 2) +#if !defined(VINT32x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT32x2_LSHIFT_DEFINED #endif - - -/* vint32x2 */ - -#ifndef VUINT32x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(u, 32, 2) +#if !defined(VUINT32x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x) +{ + vuint32x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VUINT32x2_SPLAT_DEFINED #endif -#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(u, 32, 2) +#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2]) +{ + vuint32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} # define VUINT32x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x2_LOAD_DEFINED -VEC_GENERIC_LOAD(u, 32, 2) +#if !defined(VUINT32x2_LOAD_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2]) +{ + vuint32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} # define VUINT32x2_LOAD_DEFINED #endif -#ifndef VUINT32x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(u, 32, 2) +#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2]) +{ + memcpy(x, vec.generic, 8); +} # define VUINT32x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x2_STORE_DEFINED -VEC_GENERIC_STORE(u, 32, 2) +#if !defined(VUINT32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2]) +{ + memcpy(x, vec.generic, 8); +} # define VUINT32x2_STORE_DEFINED #endif -#ifndef VUINT32x2_ADD_DEFINED -VEC_GENERIC_ADD(u, 32, 2) +#if !defined(VUINT32x2_ADD_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VUINT32x2_ADD_DEFINED #endif -#ifndef VUINT32x2_SUB_DEFINED -VEC_GENERIC_SUB(u, 32, 2) +#if !defined(VUINT32x2_SUB_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VUINT32x2_SUB_DEFINED #endif -#ifndef VUINT32x2_MUL_DEFINED -VEC_GENERIC_MUL(u, 32, 2) +#if !defined(VUINT32x2_MUL_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VUINT32x2_MUL_DEFINED #endif -#ifndef VUINT32x2_DIV_DEFINED -VEC_GENERIC_DIV(u, 32, 2) +#if !defined(VUINT32x2_DIV_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_div(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VUINT32x2_DIV_DEFINED #endif -#ifndef VUINT32x2_MOD_DEFINED -VEC_GENERIC_MOD(u, 32, 2) +#if !defined(VUINT32x2_MOD_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_mod(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VUINT32x2_MOD_DEFINED #endif -#ifndef VUINT32x2_AVG_DEFINED -VEC_GENERIC_AVG(u, 32, 2) +#if !defined(VUINT32x2_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); + return vec1; +} # define VUINT32x2_AVG_DEFINED #endif -#ifndef VUINT32x2_AND_DEFINED -VEC_GENERIC_AND(u, 32, 2) +#if !defined(VUINT32x2_AND_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VUINT32x2_AND_DEFINED #endif -#ifndef VUINT32x2_OR_DEFINED -VEC_GENERIC_OR(u, 32, 2) +#if !defined(VUINT32x2_OR_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VUINT32x2_OR_DEFINED #endif -#ifndef VUINT32x2_XOR_DEFINED -VEC_GENERIC_XOR(u, 32, 2) +#if !defined(VUINT32x2_XOR_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VUINT32x2_XOR_DEFINED #endif -#ifndef VUINT32x2_NOT_DEFINED -VEC_GENERIC_NOT(u, 32, 2) +#if !defined(VUINT32x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VUINT32x2_NOT_DEFINED #endif -#ifndef VUINT32x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(u, 32, 2) +#if !defined(VUINT32x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VUINT32x2_CMPLT_DEFINED #endif -#ifndef VUINT32x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(u, 32, 2) +#if !defined(VUINT32x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VUINT32x2_CMPEQ_DEFINED #endif -#ifndef VUINT32x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(u, 32, 2) +#if !defined(VUINT32x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VUINT32x2_CMPGT_DEFINED #endif -#ifndef VUINT32x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(u, 32, 2) +#if !defined(VUINT32x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VUINT32x2_CMPLE_DEFINED #endif -#ifndef VUINT32x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(u, 32, 2) +#if !defined(VUINT32x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} # define VUINT32x2_CMPGE_DEFINED #endif -#ifndef VUINT32x2_MIN_DEFINED -VEC_GENERIC_MIN(u, 32, 2) +#if !defined(VUINT32x2_MIN_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT32x2_MIN_DEFINED #endif -#ifndef VUINT32x2_MAX_DEFINED -VEC_GENERIC_MAX(u, 32, 2) +#if !defined(VUINT32x2_MAX_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT32x2_MAX_DEFINED #endif -#ifndef VUINT32x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(u, 32, 2) +#if !defined(VUINT32x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT32x2_RSHIFT_DEFINED #endif -#ifndef VUINT32x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(u, 32, 2) +#if !defined(VUINT32x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT32x2_LRSHIFT_DEFINED #endif -#ifndef VUINT32x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(u, 32, 2) +#if !defined(VUINT32x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + return vec1; +} # define VUINT32x2_LSHIFT_DEFINED #endif - - -/* vuint64x2 */ - -#ifndef VINT64x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(/* nothing */, 64, 2) +#if !defined(VINT32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) +{ + vint32x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VINT32x4_SPLAT_DEFINED +#endif +#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) +{ + vint32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) +{ + vint32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VINT32x4_LOAD_DEFINED +#endif +#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VINT32x4_STORE_DEFINED +#endif +#if !defined(VINT32x4_ADD_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VINT32x4_ADD_DEFINED +#endif +#if !defined(VINT32x4_SUB_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VINT32x4_SUB_DEFINED +#endif +#if !defined(VINT32x4_MUL_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VINT32x4_MUL_DEFINED +#endif +#if !defined(VINT32x4_DIV_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_div(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VINT32x4_DIV_DEFINED +#endif +#if !defined(VINT32x4_MOD_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_mod(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VINT32x4_MOD_DEFINED +#endif +#if !defined(VINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +{ + vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT32x4_AVG_DEFINED +#endif +#if !defined(VINT32x4_AND_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VINT32x4_AND_DEFINED +#endif +#if !defined(VINT32x4_OR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VINT32x4_OR_DEFINED +#endif +#if !defined(VINT32x4_XOR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VINT32x4_XOR_DEFINED +#endif +#if !defined(VINT32x4_NOT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VINT32x4_NOT_DEFINED +#endif +#if !defined(VINT32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x4_CMPLT_DEFINED +#endif +#if !defined(VINT32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x4_CMPEQ_DEFINED +#endif +#if !defined(VINT32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x4_CMPGT_DEFINED +#endif +#if !defined(VINT32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x4_CMPLE_DEFINED +#endif +#if !defined(VINT32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x4_CMPGE_DEFINED +#endif +#if !defined(VINT32x4_MIN_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT32x4_MIN_DEFINED +#endif +#if !defined(VINT32x4_MAX_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT32x4_MAX_DEFINED +#endif +#if !defined(VINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); + return vec1; +} +# define VINT32x4_RSHIFT_DEFINED +#endif +#if !defined(VINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) +{ + vuint32x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VUINT32x4_SPLAT_DEFINED +#endif +#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) +{ + vuint32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) +{ + vuint32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VUINT32x4_LOAD_DEFINED +#endif +#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VUINT32x4_STORE_DEFINED +#endif +#if !defined(VUINT32x4_ADD_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VUINT32x4_ADD_DEFINED +#endif +#if !defined(VUINT32x4_SUB_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VUINT32x4_SUB_DEFINED +#endif +#if !defined(VUINT32x4_MUL_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VUINT32x4_MUL_DEFINED +#endif +#if !defined(VUINT32x4_DIV_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_div(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VUINT32x4_DIV_DEFINED +#endif +#if !defined(VUINT32x4_MOD_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_mod(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VUINT32x4_MOD_DEFINED +#endif +#if !defined(VUINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); + return vec1; +} +# define VUINT32x4_AVG_DEFINED +#endif +#if !defined(VUINT32x4_AND_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VUINT32x4_AND_DEFINED +#endif +#if !defined(VUINT32x4_OR_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VUINT32x4_OR_DEFINED +#endif +#if !defined(VUINT32x4_XOR_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VUINT32x4_XOR_DEFINED +#endif +#if !defined(VUINT32x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VUINT32x4_NOT_DEFINED +#endif +#if !defined(VUINT32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x4_CMPLT_DEFINED +#endif +#if !defined(VUINT32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x4_CMPEQ_DEFINED +#endif +#if !defined(VUINT32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x4_CMPGT_DEFINED +#endif +#if !defined(VUINT32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x4_CMPLE_DEFINED +#endif +#if !defined(VUINT32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x4_CMPGE_DEFINED +#endif +#if !defined(VUINT32x4_MIN_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT32x4_MIN_DEFINED +#endif +#if !defined(VUINT32x4_MAX_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT32x4_MAX_DEFINED +#endif +#if !defined(VUINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x4_RSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + return vec1; +} +# define VUINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VINT32x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x) +{ + vint32x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VINT32x8_SPLAT_DEFINED +#endif +#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8]) +{ + vint32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT32x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT32x8_LOAD_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8]) +{ + vint32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT32x8_LOAD_DEFINED +#endif +#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT32x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT32x8_STORE_DEFINED +#endif +#if !defined(VINT32x8_ADD_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VINT32x8_ADD_DEFINED +#endif +#if !defined(VINT32x8_SUB_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VINT32x8_SUB_DEFINED +#endif +#if !defined(VINT32x8_MUL_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VINT32x8_MUL_DEFINED +#endif +#if !defined(VINT32x8_DIV_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_div(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VINT32x8_DIV_DEFINED +#endif +#if !defined(VINT32x8_MOD_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_mod(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VINT32x8_MOD_DEFINED +#endif +#if !defined(VINT32x8_AVG_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2) +{ + vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT32x8_AVG_DEFINED +#endif +#if !defined(VINT32x8_AND_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VINT32x8_AND_DEFINED +#endif +#if !defined(VINT32x8_OR_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VINT32x8_OR_DEFINED +#endif +#if !defined(VINT32x8_XOR_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VINT32x8_XOR_DEFINED +#endif +#if !defined(VINT32x8_NOT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VINT32x8_NOT_DEFINED +#endif +#if !defined(VINT32x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x8_CMPLT_DEFINED +#endif +#if !defined(VINT32x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x8_CMPEQ_DEFINED +#endif +#if !defined(VINT32x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x8_CMPGT_DEFINED +#endif +#if !defined(VINT32x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x8_CMPLE_DEFINED +#endif +#if !defined(VINT32x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x8_CMPGE_DEFINED +#endif +#if !defined(VINT32x8_MIN_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT32x8_MIN_DEFINED +#endif +#if !defined(VINT32x8_MAX_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT32x8_MAX_DEFINED +#endif +#if !defined(VINT32x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); + return vec1; +} +# define VINT32x8_RSHIFT_DEFINED +#endif +#if !defined(VINT32x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT32x8_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT32x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x) +{ + vuint32x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VUINT32x8_SPLAT_DEFINED +#endif +#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8]) +{ + vuint32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT32x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x8_LOAD_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8]) +{ + vuint32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT32x8_LOAD_DEFINED +#endif +#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT32x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT32x8_STORE_DEFINED +#endif +#if !defined(VUINT32x8_ADD_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VUINT32x8_ADD_DEFINED +#endif +#if !defined(VUINT32x8_SUB_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VUINT32x8_SUB_DEFINED +#endif +#if !defined(VUINT32x8_MUL_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VUINT32x8_MUL_DEFINED +#endif +#if !defined(VUINT32x8_DIV_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_div(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VUINT32x8_DIV_DEFINED +#endif +#if !defined(VUINT32x8_MOD_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_mod(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VUINT32x8_MOD_DEFINED +#endif +#if !defined(VUINT32x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); + return vec1; +} +# define VUINT32x8_AVG_DEFINED +#endif +#if !defined(VUINT32x8_AND_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VUINT32x8_AND_DEFINED +#endif +#if !defined(VUINT32x8_OR_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VUINT32x8_OR_DEFINED +#endif +#if !defined(VUINT32x8_XOR_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VUINT32x8_XOR_DEFINED +#endif +#if !defined(VUINT32x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VUINT32x8_NOT_DEFINED +#endif +#if !defined(VUINT32x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x8_CMPLT_DEFINED +#endif +#if !defined(VUINT32x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x8_CMPEQ_DEFINED +#endif +#if !defined(VUINT32x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x8_CMPGT_DEFINED +#endif +#if !defined(VUINT32x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x8_CMPLE_DEFINED +#endif +#if !defined(VUINT32x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x8_CMPGE_DEFINED +#endif +#if !defined(VUINT32x8_MIN_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT32x8_MIN_DEFINED +#endif +#if !defined(VUINT32x8_MAX_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT32x8_MAX_DEFINED +#endif +#if !defined(VUINT32x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT32x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + return vec1; +} +# define VUINT32x8_LSHIFT_DEFINED +#endif +#if !defined(VINT32x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x) +{ + vint32x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VINT32x16_SPLAT_DEFINED +#endif +#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16]) +{ + vint32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT32x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT32x16_LOAD_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16]) +{ + vint32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT32x16_LOAD_DEFINED +#endif +#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT32x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT32x16_STORE_DEFINED +#endif +#if !defined(VINT32x16_ADD_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VINT32x16_ADD_DEFINED +#endif +#if !defined(VINT32x16_SUB_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VINT32x16_SUB_DEFINED +#endif +#if !defined(VINT32x16_MUL_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VINT32x16_MUL_DEFINED +#endif +#if !defined(VINT32x16_DIV_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_div(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VINT32x16_DIV_DEFINED +#endif +#if !defined(VINT32x16_MOD_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_mod(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VINT32x16_MOD_DEFINED +#endif +#if !defined(VINT32x16_AVG_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2) +{ + vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[8] % 2); + y_d_rem = (vec2.generic[8] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[9] % 2); + y_d_rem = (vec2.generic[9] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[10] % 2); + y_d_rem = (vec2.generic[10] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[11] % 2); + y_d_rem = (vec2.generic[11] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[12] % 2); + y_d_rem = (vec2.generic[12] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[13] % 2); + y_d_rem = (vec2.generic[13] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[14] % 2); + y_d_rem = (vec2.generic[14] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[15] % 2); + y_d_rem = (vec2.generic[15] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT32x16_AVG_DEFINED +#endif +#if !defined(VINT32x16_AND_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VINT32x16_AND_DEFINED +#endif +#if !defined(VINT32x16_OR_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VINT32x16_OR_DEFINED +#endif +#if !defined(VINT32x16_XOR_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VINT32x16_XOR_DEFINED +#endif +#if !defined(VINT32x16_NOT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VINT32x16_NOT_DEFINED +#endif +#if !defined(VINT32x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x16_CMPLT_DEFINED +#endif +#if !defined(VINT32x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x16_CMPEQ_DEFINED +#endif +#if !defined(VINT32x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x16_CMPGT_DEFINED +#endif +#if !defined(VINT32x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x16_CMPLE_DEFINED +#endif +#if !defined(VINT32x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VINT32x16_CMPGE_DEFINED +#endif +#if !defined(VINT32x16_MIN_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT32x16_MIN_DEFINED +#endif +#if !defined(VINT32x16_MAX_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VINT32x16_MAX_DEFINED +#endif +#if !defined(VINT32x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); +vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]); +vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]); +vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]); +vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]); +vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]); +vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]); +vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]); +vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]); + return vec1; +} +# define VINT32x16_RSHIFT_DEFINED +#endif +#if !defined(VINT32x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u >>= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u >>= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u >>= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u >>= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u >>= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u >>= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u >>= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u >>= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT32x16_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2) +{ + union { vec_uint32 u; vec_int32 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + x.s = vec1.generic[8]; + x.u <<= vec2.generic[8]; + vec1.generic[8] = x.s; + x.s = vec1.generic[9]; + x.u <<= vec2.generic[9]; + vec1.generic[9] = x.s; + x.s = vec1.generic[10]; + x.u <<= vec2.generic[10]; + vec1.generic[10] = x.s; + x.s = vec1.generic[11]; + x.u <<= vec2.generic[11]; + vec1.generic[11] = x.s; + x.s = vec1.generic[12]; + x.u <<= vec2.generic[12]; + vec1.generic[12] = x.s; + x.s = vec1.generic[13]; + x.u <<= vec2.generic[13]; + vec1.generic[13] = x.s; + x.s = vec1.generic[14]; + x.u <<= vec2.generic[14]; + vec1.generic[14] = x.s; + x.s = vec1.generic[15]; + x.u <<= vec2.generic[15]; + vec1.generic[15] = x.s; + return vec1; +} +# define VINT32x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x) +{ + vuint32x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VUINT32x16_SPLAT_DEFINED +#endif +#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16]) +{ + vuint32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT32x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x16_LOAD_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16]) +{ + vuint32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT32x16_LOAD_DEFINED +#endif +#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT32x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT32x16_STORE_DEFINED +#endif +#if !defined(VUINT32x16_ADD_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VUINT32x16_ADD_DEFINED +#endif +#if !defined(VUINT32x16_SUB_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VUINT32x16_SUB_DEFINED +#endif +#if !defined(VUINT32x16_MUL_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VUINT32x16_MUL_DEFINED +#endif +#if !defined(VUINT32x16_DIV_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_div(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VUINT32x16_DIV_DEFINED +#endif +#if !defined(VUINT32x16_MOD_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_mod(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0); + return vec1; +} +# define VUINT32x16_MOD_DEFINED +#endif +#if !defined(VUINT32x16_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); +vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1); +vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1); +vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1); +vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1); +vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1); +vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1); +vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1); +vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1); + return vec1; +} +# define VUINT32x16_AVG_DEFINED +#endif +#if !defined(VUINT32x16_AND_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]); + return vec1; +} +# define VUINT32x16_AND_DEFINED +#endif +#if !defined(VUINT32x16_OR_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]); + return vec1; +} +# define VUINT32x16_OR_DEFINED +#endif +#if !defined(VUINT32x16_XOR_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]); + return vec1; +} +# define VUINT32x16_XOR_DEFINED +#endif +#if !defined(VUINT32x16_NOT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + vec.generic[8] = ~vec.generic[8]; + vec.generic[9] = ~vec.generic[9]; + vec.generic[10] = ~vec.generic[10]; + vec.generic[11] = ~vec.generic[11]; + vec.generic[12] = ~vec.generic[12]; + vec.generic[13] = ~vec.generic[13]; + vec.generic[14] = ~vec.generic[14]; + vec.generic[15] = ~vec.generic[15]; + return vec; +} +# define VUINT32x16_NOT_DEFINED +#endif +#if !defined(VUINT32x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x16_CMPLT_DEFINED +#endif +#if !defined(VUINT32x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x16_CMPEQ_DEFINED +#endif +#if !defined(VUINT32x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x16_CMPGT_DEFINED +#endif +#if !defined(VUINT32x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x16_CMPLE_DEFINED +#endif +#if !defined(VUINT32x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VUINT32x16_CMPGE_DEFINED +#endif +#if !defined(VUINT32x16_MIN_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT32x16_MIN_DEFINED +#endif +#if !defined(VUINT32x16_MAX_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VUINT32x16_MAX_DEFINED +#endif +#if !defined(VUINT32x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x16_RSHIFT_DEFINED +#endif +#if !defined(VUINT32x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + vec1.generic[8] >>= vec2.generic[0]; + vec1.generic[9] >>= vec2.generic[0]; + vec1.generic[10] >>= vec2.generic[0]; + vec1.generic[11] >>= vec2.generic[0]; + vec1.generic[12] >>= vec2.generic[0]; + vec1.generic[13] >>= vec2.generic[0]; + vec1.generic[14] >>= vec2.generic[0]; + vec1.generic[15] >>= vec2.generic[0]; + return vec1; +} +# define VUINT32x16_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + vec1.generic[8] <<= vec2.generic[0]; + vec1.generic[9] <<= vec2.generic[0]; + vec1.generic[10] <<= vec2.generic[0]; + vec1.generic[11] <<= vec2.generic[0]; + vec1.generic[12] <<= vec2.generic[0]; + vec1.generic[13] <<= vec2.generic[0]; + vec1.generic[14] <<= vec2.generic[0]; + vec1.generic[15] <<= vec2.generic[0]; + return vec1; +} +# define VUINT32x16_LSHIFT_DEFINED +#endif +#if !defined(VINT64x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x) +{ + vint64x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VINT64x2_SPLAT_DEFINED #endif -#ifndef VINT64x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 64, 2) +#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2]) +{ + vint64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} # define VINT64x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT64x2_LOAD_DEFINED -VEC_GENERIC_LOAD(/* nothing */, 64, 2) +#if !defined(VINT64x2_LOAD_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2]) +{ + vint64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} # define VINT64x2_LOAD_DEFINED #endif -#ifndef VINT64x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(/* nothing */, 64, 2) +#if !defined(VINT64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2]) +{ + memcpy(x, vec.generic, 16); +} # define VINT64x2_STORE_ALIGNED_DEFINED #endif -#ifndef VINT64x2_STORE_DEFINED -VEC_GENERIC_STORE(/* nothing */, 64, 2) +#if !defined(VINT64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2]) +{ + memcpy(x, vec.generic, 16); +} # define VINT64x2_STORE_DEFINED #endif -#ifndef VINT64x2_ADD_DEFINED -VEC_GENERIC_ADD(/* nothing */, 64, 2) +#if !defined(VINT64x2_ADD_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VINT64x2_ADD_DEFINED #endif -#ifndef VINT64x2_SUB_DEFINED -VEC_GENERIC_SUB(/* nothing */, 64, 2) +#if !defined(VINT64x2_SUB_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VINT64x2_SUB_DEFINED #endif -#ifndef VINT64x2_MUL_DEFINED -VEC_GENERIC_MUL(/* nothing */, 64, 2) +#if !defined(VINT64x2_MUL_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VINT64x2_MUL_DEFINED #endif -#ifndef VINT64x2_DIV_DEFINED -VEC_GENERIC_DIV(/* nothing */, 64, 2) +#if !defined(VINT64x2_DIV_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_div(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VINT64x2_DIV_DEFINED #endif -#ifndef VINT64x2_MOD_DEFINED -VEC_GENERIC_MOD(/* nothing */, 64, 2) +#if !defined(VINT64x2_MOD_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_mod(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VINT64x2_MOD_DEFINED #endif -#ifndef VINT64x2_AVG_DEFINED -VEC_GENERIC_AVG(/* nothing */, 64, 2) +#if !defined(VINT64x2_AVG_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2) +{ + vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} # define VINT64x2_AVG_DEFINED #endif -#ifndef VINT64x2_AND_DEFINED -VEC_GENERIC_AND(/* nothing */, 64, 2) +#if !defined(VINT64x2_AND_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VINT64x2_AND_DEFINED #endif -#ifndef VINT64x2_OR_DEFINED -VEC_GENERIC_OR(/* nothing */, 64, 2) +#if !defined(VINT64x2_OR_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VINT64x2_OR_DEFINED #endif -#ifndef VINT64x2_XOR_DEFINED -VEC_GENERIC_XOR(/* nothing */, 64, 2) +#if !defined(VINT64x2_XOR_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VINT64x2_XOR_DEFINED #endif -#ifndef VINT64x2_NOT_DEFINED -VEC_GENERIC_NOT(/* nothing */, 64, 2) +#if !defined(VINT64x2_NOT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VINT64x2_NOT_DEFINED #endif -#ifndef VINT64x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(/* nothing */, 64, 2) +#if !defined(VINT64x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VINT64x2_CMPLT_DEFINED #endif -#ifndef VINT64x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(/* nothing */, 64, 2) +#if !defined(VINT64x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VINT64x2_CMPEQ_DEFINED #endif -#ifndef VINT64x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(/* nothing */, 64, 2) +#if !defined(VINT64x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VINT64x2_CMPGT_DEFINED #endif -#ifndef VINT64x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(/* nothing */, 64, 2) +#if !defined(VINT64x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VINT64x2_CMPLE_DEFINED #endif -#ifndef VINT64x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(/* nothing */, 64, 2) +#if !defined(VINT64x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VINT64x2_CMPGE_DEFINED #endif -#ifndef VINT64x2_MIN_DEFINED -VEC_GENERIC_MIN(/* nothing */, 64, 2) +#if !defined(VINT64x2_MIN_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT64x2_MIN_DEFINED #endif -#ifndef VINT64x2_MAX_DEFINED -VEC_GENERIC_MAX(/* nothing */, 64, 2) +#if !defined(VINT64x2_MAX_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VINT64x2_MAX_DEFINED #endif -#ifndef VINT64x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(/* nothing */, 64, 2) +#if !defined(VINT64x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); + return vec1; +} # define VINT64x2_RSHIFT_DEFINED #endif -#ifndef VINT64x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(/* nothing */, 64, 2) +#if !defined(VINT64x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT64x2_LRSHIFT_DEFINED #endif -#ifndef VINT64x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(/* nothing */, 64, 2) +#if !defined(VINT64x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + return vec1; +} # define VINT64x2_LSHIFT_DEFINED #endif - - -/* vint64x2 */ - -#ifndef VUINT64x2_SPLAT_DEFINED -VEC_GENERIC_SPLAT(u, 64, 2) +#if !defined(VUINT64x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x) +{ + vuint64x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} # define VUINT64x2_SPLAT_DEFINED #endif -#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED -VEC_GENERIC_LOAD_ALIGNED(u, 64, 2) +#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2]) +{ + vuint64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} # define VUINT64x2_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT64x2_LOAD_DEFINED -VEC_GENERIC_LOAD(u, 64, 2) +#if !defined(VUINT64x2_LOAD_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2]) +{ + vuint64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} # define VUINT64x2_LOAD_DEFINED #endif -#ifndef VUINT64x2_STORE_ALIGNED_DEFINED -VEC_GENERIC_STORE_ALIGNED(u, 64, 2) +#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2]) +{ + memcpy(x, vec.generic, 16); +} # define VUINT64x2_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT64x2_STORE_DEFINED -VEC_GENERIC_STORE(u, 64, 2) +#if !defined(VUINT64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2]) +{ + memcpy(x, vec.generic, 16); +} # define VUINT64x2_STORE_DEFINED #endif -#ifndef VUINT64x2_ADD_DEFINED -VEC_GENERIC_ADD(u, 64, 2) +#if !defined(VUINT64x2_ADD_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} # define VUINT64x2_ADD_DEFINED #endif -#ifndef VUINT64x2_SUB_DEFINED -VEC_GENERIC_SUB(u, 64, 2) +#if !defined(VUINT64x2_SUB_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} # define VUINT64x2_SUB_DEFINED #endif -#ifndef VUINT64x2_MUL_DEFINED -VEC_GENERIC_MUL(u, 64, 2) +#if !defined(VUINT64x2_MUL_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} # define VUINT64x2_MUL_DEFINED #endif -#ifndef VUINT64x2_DIV_DEFINED -VEC_GENERIC_DIV(u, 64, 2) +#if !defined(VUINT64x2_DIV_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_div(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} # define VUINT64x2_DIV_DEFINED #endif -#ifndef VUINT64x2_MOD_DEFINED -VEC_GENERIC_MOD(u, 64, 2) +#if !defined(VUINT64x2_MOD_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_mod(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + return vec1; +} # define VUINT64x2_MOD_DEFINED #endif -#ifndef VUINT64x2_AVG_DEFINED -VEC_GENERIC_AVG(u, 64, 2) +#if !defined(VUINT64x2_AVG_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); + return vec1; +} # define VUINT64x2_AVG_DEFINED #endif -#ifndef VUINT64x2_AND_DEFINED -VEC_GENERIC_AND(u, 64, 2) +#if !defined(VUINT64x2_AND_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + return vec1; +} # define VUINT64x2_AND_DEFINED #endif -#ifndef VUINT64x2_OR_DEFINED -VEC_GENERIC_OR(u, 64, 2) +#if !defined(VUINT64x2_OR_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + return vec1; +} # define VUINT64x2_OR_DEFINED #endif -#ifndef VUINT64x2_XOR_DEFINED -VEC_GENERIC_XOR(u, 64, 2) +#if !defined(VUINT64x2_XOR_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + return vec1; +} # define VUINT64x2_XOR_DEFINED #endif -#ifndef VUINT64x2_NOT_DEFINED -VEC_GENERIC_NOT(u, 64, 2) +#if !defined(VUINT64x2_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + return vec; +} # define VUINT64x2_NOT_DEFINED #endif -#ifndef VUINT64x2_CMPLT_DEFINED -VEC_GENERIC_CMPLT(u, 64, 2) +#if !defined(VUINT64x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VUINT64x2_CMPLT_DEFINED #endif -#ifndef VUINT64x2_CMPEQ_DEFINED -VEC_GENERIC_CMPEQ(u, 64, 2) +#if !defined(VUINT64x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VUINT64x2_CMPEQ_DEFINED #endif -#ifndef VUINT64x2_CMPGT_DEFINED -VEC_GENERIC_CMPGT(u, 64, 2) +#if !defined(VUINT64x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VUINT64x2_CMPGT_DEFINED #endif -#ifndef VUINT64x2_CMPLE_DEFINED -VEC_GENERIC_CMPLE(u, 64, 2) +#if !defined(VUINT64x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VUINT64x2_CMPLE_DEFINED #endif -#ifndef VUINT64x2_CMPGE_DEFINED -VEC_GENERIC_CMPGE(u, 64, 2) +#if !defined(VUINT64x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} # define VUINT64x2_CMPGE_DEFINED #endif -#ifndef VUINT64x2_MIN_DEFINED -VEC_GENERIC_MIN(u, 64, 2) +#if !defined(VUINT64x2_MIN_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT64x2_MIN_DEFINED #endif -#ifndef VUINT64x2_MAX_DEFINED -VEC_GENERIC_MAX(u, 64, 2) +#if !defined(VUINT64x2_MAX_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} # define VUINT64x2_MAX_DEFINED #endif -#ifndef VUINT64x2_RSHIFT_DEFINED -VEC_GENERIC_RSHIFT(u, 64, 2) +#if !defined(VUINT64x2_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT64x2_RSHIFT_DEFINED #endif -#ifndef VUINT64x2_LRSHIFT_DEFINED -VEC_GENERIC_LRSHIFT(u, 64, 2) +#if !defined(VUINT64x2_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + return vec1; +} # define VUINT64x2_LRSHIFT_DEFINED #endif -#ifndef VUINT64x2_LSHIFT_DEFINED -VEC_GENERIC_LSHIFT(u, 64, 2) +#if !defined(VUINT64x2_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + return vec1; +} # define VUINT64x2_LSHIFT_DEFINED #endif -#endif /* VEC_IMPL_GENERIC_H_ */ - +#if !defined(VINT64x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x) +{ + vint64x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VINT64x4_SPLAT_DEFINED +#endif +#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4]) +{ + vint64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT64x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT64x4_LOAD_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4]) +{ + vint64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VINT64x4_LOAD_DEFINED +#endif +#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT64x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VINT64x4_STORE_DEFINED +#endif +#if !defined(VINT64x4_ADD_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VINT64x4_ADD_DEFINED +#endif +#if !defined(VINT64x4_SUB_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VINT64x4_SUB_DEFINED +#endif +#if !defined(VINT64x4_MUL_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VINT64x4_MUL_DEFINED +#endif +#if !defined(VINT64x4_DIV_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_div(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VINT64x4_DIV_DEFINED +#endif +#if !defined(VINT64x4_MOD_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_mod(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VINT64x4_MOD_DEFINED +#endif +#if !defined(VINT64x4_AVG_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2) +{ + vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT64x4_AVG_DEFINED +#endif +#if !defined(VINT64x4_AND_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VINT64x4_AND_DEFINED +#endif +#if !defined(VINT64x4_OR_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VINT64x4_OR_DEFINED +#endif +#if !defined(VINT64x4_XOR_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VINT64x4_XOR_DEFINED +#endif +#if !defined(VINT64x4_NOT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VINT64x4_NOT_DEFINED +#endif +#if !defined(VINT64x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x4_CMPLT_DEFINED +#endif +#if !defined(VINT64x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x4_CMPEQ_DEFINED +#endif +#if !defined(VINT64x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x4_CMPGT_DEFINED +#endif +#if !defined(VINT64x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x4_CMPLE_DEFINED +#endif +#if !defined(VINT64x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x4_CMPGE_DEFINED +#endif +#if !defined(VINT64x4_MIN_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT64x4_MIN_DEFINED +#endif +#if !defined(VINT64x4_MAX_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VINT64x4_MAX_DEFINED +#endif +#if !defined(VINT64x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); + return vec1; +} +# define VINT64x4_RSHIFT_DEFINED +#endif +#if !defined(VINT64x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT64x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT64x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + return vec1; +} +# define VINT64x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x) +{ + vuint64x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VUINT64x4_SPLAT_DEFINED +#endif +#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4]) +{ + vuint64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT64x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x4_LOAD_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4]) +{ + vuint64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VUINT64x4_LOAD_DEFINED +#endif +#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT64x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VUINT64x4_STORE_DEFINED +#endif +#if !defined(VUINT64x4_ADD_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VUINT64x4_ADD_DEFINED +#endif +#if !defined(VUINT64x4_SUB_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VUINT64x4_SUB_DEFINED +#endif +#if !defined(VUINT64x4_MUL_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VUINT64x4_MUL_DEFINED +#endif +#if !defined(VUINT64x4_DIV_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_div(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VUINT64x4_DIV_DEFINED +#endif +#if !defined(VUINT64x4_MOD_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_mod(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + return vec1; +} +# define VUINT64x4_MOD_DEFINED +#endif +#if !defined(VUINT64x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); + return vec1; +} +# define VUINT64x4_AVG_DEFINED +#endif +#if !defined(VUINT64x4_AND_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + return vec1; +} +# define VUINT64x4_AND_DEFINED +#endif +#if !defined(VUINT64x4_OR_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + return vec1; +} +# define VUINT64x4_OR_DEFINED +#endif +#if !defined(VUINT64x4_XOR_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + return vec1; +} +# define VUINT64x4_XOR_DEFINED +#endif +#if !defined(VUINT64x4_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + return vec; +} +# define VUINT64x4_NOT_DEFINED +#endif +#if !defined(VUINT64x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x4_CMPLT_DEFINED +#endif +#if !defined(VUINT64x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x4_CMPEQ_DEFINED +#endif +#if !defined(VUINT64x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x4_CMPGT_DEFINED +#endif +#if !defined(VUINT64x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x4_CMPLE_DEFINED +#endif +#if !defined(VUINT64x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x4_CMPGE_DEFINED +#endif +#if !defined(VUINT64x4_MIN_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT64x4_MIN_DEFINED +#endif +#if !defined(VUINT64x4_MAX_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VUINT64x4_MAX_DEFINED +#endif +#if !defined(VUINT64x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT64x4_RSHIFT_DEFINED +#endif +#if !defined(VUINT64x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + return vec1; +} +# define VUINT64x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT64x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + return vec1; +} +# define VUINT64x4_LSHIFT_DEFINED +#endif +#if !defined(VINT64x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x) +{ + vint64x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VINT64x8_SPLAT_DEFINED +#endif +#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8]) +{ + vint64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT64x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT64x8_LOAD_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8]) +{ + vint64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VINT64x8_LOAD_DEFINED +#endif +#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT64x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VINT64x8_STORE_DEFINED +#endif +#if !defined(VINT64x8_ADD_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VINT64x8_ADD_DEFINED +#endif +#if !defined(VINT64x8_SUB_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VINT64x8_SUB_DEFINED +#endif +#if !defined(VINT64x8_MUL_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VINT64x8_MUL_DEFINED +#endif +#if !defined(VINT64x8_DIV_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_div(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VINT64x8_DIV_DEFINED +#endif +#if !defined(VINT64x8_MOD_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_mod(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VINT64x8_MOD_DEFINED +#endif +#if !defined(VINT64x8_AVG_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2) +{ + vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem; + x_d_rem = (vec1.generic[0] % 2); + y_d_rem = (vec2.generic[0] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[1] % 2); + y_d_rem = (vec2.generic[1] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[2] % 2); + y_d_rem = (vec2.generic[2] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[3] % 2); + y_d_rem = (vec2.generic[3] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[4] % 2); + y_d_rem = (vec2.generic[4] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[5] % 2); + y_d_rem = (vec2.generic[5] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[6] % 2); + y_d_rem = (vec2.generic[6] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + x_d_rem = (vec1.generic[7] % 2); + y_d_rem = (vec2.generic[7] % 2); + rem_d_quot = ((x_d_rem + y_d_rem) / 2); + rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return vec1; +} +# define VINT64x8_AVG_DEFINED +#endif +#if !defined(VINT64x8_AND_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VINT64x8_AND_DEFINED +#endif +#if !defined(VINT64x8_OR_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VINT64x8_OR_DEFINED +#endif +#if !defined(VINT64x8_XOR_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VINT64x8_XOR_DEFINED +#endif +#if !defined(VINT64x8_NOT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VINT64x8_NOT_DEFINED +#endif +#if !defined(VINT64x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x8_CMPLT_DEFINED +#endif +#if !defined(VINT64x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x8_CMPEQ_DEFINED +#endif +#if !defined(VINT64x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x8_CMPGT_DEFINED +#endif +#if !defined(VINT64x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x8_CMPLE_DEFINED +#endif +#if !defined(VINT64x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VINT64x8_CMPGE_DEFINED +#endif +#if !defined(VINT64x8_MIN_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT64x8_MIN_DEFINED +#endif +#if !defined(VINT64x8_MAX_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VINT64x8_MAX_DEFINED +#endif +#if !defined(VINT64x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2) +{ +vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]); +vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]); +vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]); +vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]); +vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]); +vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]); +vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]); +vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]); + return vec1; +} +# define VINT64x8_RSHIFT_DEFINED +#endif +#if !defined(VINT64x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u >>= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u >>= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u >>= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u >>= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u >>= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u >>= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u >>= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u >>= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT64x8_LRSHIFT_DEFINED +#endif +#if !defined(VINT64x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2) +{ + union { vec_uint64 u; vec_int64 s; } x; + + x.s = vec1.generic[0]; + x.u <<= vec2.generic[0]; + vec1.generic[0] = x.s; + x.s = vec1.generic[1]; + x.u <<= vec2.generic[1]; + vec1.generic[1] = x.s; + x.s = vec1.generic[2]; + x.u <<= vec2.generic[2]; + vec1.generic[2] = x.s; + x.s = vec1.generic[3]; + x.u <<= vec2.generic[3]; + vec1.generic[3] = x.s; + x.s = vec1.generic[4]; + x.u <<= vec2.generic[4]; + vec1.generic[4] = x.s; + x.s = vec1.generic[5]; + x.u <<= vec2.generic[5]; + vec1.generic[5] = x.s; + x.s = vec1.generic[6]; + x.u <<= vec2.generic[6]; + vec1.generic[6] = x.s; + x.s = vec1.generic[7]; + x.u <<= vec2.generic[7]; + vec1.generic[7] = x.s; + return vec1; +} +# define VINT64x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x) +{ + vuint64x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VUINT64x8_SPLAT_DEFINED +#endif +#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8]) +{ + vuint64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT64x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x8_LOAD_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8]) +{ + vuint64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VUINT64x8_LOAD_DEFINED +#endif +#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT64x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VUINT64x8_STORE_DEFINED +#endif +#if !defined(VUINT64x8_ADD_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VUINT64x8_ADD_DEFINED +#endif +#if !defined(VUINT64x8_SUB_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VUINT64x8_SUB_DEFINED +#endif +#if !defined(VUINT64x8_MUL_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VUINT64x8_MUL_DEFINED +#endif +#if !defined(VUINT64x8_DIV_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_div(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VUINT64x8_DIV_DEFINED +#endif +#if !defined(VUINT64x8_MOD_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_mod(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0); + return vec1; +} +# define VUINT64x8_MOD_DEFINED +#endif +#if !defined(VUINT64x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2) +{ +vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1); +vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1); +vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1); +vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1); +vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1); +vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1); +vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1); +vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1); + return vec1; +} +# define VUINT64x8_AVG_DEFINED +#endif +#if !defined(VUINT64x8_AND_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]); + return vec1; +} +# define VUINT64x8_AND_DEFINED +#endif +#if !defined(VUINT64x8_OR_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]); + return vec1; +} +# define VUINT64x8_OR_DEFINED +#endif +#if !defined(VUINT64x8_XOR_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]); + return vec1; +} +# define VUINT64x8_XOR_DEFINED +#endif +#if !defined(VUINT64x8_NOT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec) +{ + vec.generic[0] = ~vec.generic[0]; + vec.generic[1] = ~vec.generic[1]; + vec.generic[2] = ~vec.generic[2]; + vec.generic[3] = ~vec.generic[3]; + vec.generic[4] = ~vec.generic[4]; + vec.generic[5] = ~vec.generic[5]; + vec.generic[6] = ~vec.generic[6]; + vec.generic[7] = ~vec.generic[7]; + return vec; +} +# define VUINT64x8_NOT_DEFINED +#endif +#if !defined(VUINT64x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x8_CMPLT_DEFINED +#endif +#if !defined(VUINT64x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x8_CMPEQ_DEFINED +#endif +#if !defined(VUINT64x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x8_CMPGT_DEFINED +#endif +#if !defined(VUINT64x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x8_CMPLE_DEFINED +#endif +#if !defined(VUINT64x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VUINT64x8_CMPGE_DEFINED +#endif +#if !defined(VUINT64x8_MIN_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT64x8_MIN_DEFINED +#endif +#if !defined(VUINT64x8_MAX_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VUINT64x8_MAX_DEFINED +#endif +#if !defined(VUINT64x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT64x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] >>= vec2.generic[0]; + vec1.generic[1] >>= vec2.generic[0]; + vec1.generic[2] >>= vec2.generic[0]; + vec1.generic[3] >>= vec2.generic[0]; + vec1.generic[4] >>= vec2.generic[0]; + vec1.generic[5] >>= vec2.generic[0]; + vec1.generic[6] >>= vec2.generic[0]; + vec1.generic[7] >>= vec2.generic[0]; + return vec1; +} +# define VUINT64x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT64x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.generic[0] <<= vec2.generic[0]; + vec1.generic[1] <<= vec2.generic[0]; + vec1.generic[2] <<= vec2.generic[0]; + vec1.generic[3] <<= vec2.generic[0]; + vec1.generic[4] <<= vec2.generic[0]; + vec1.generic[5] <<= vec2.generic[0]; + vec1.generic[6] <<= vec2.generic[0]; + vec1.generic[7] <<= vec2.generic[0]; + return vec1; +} +# define VUINT64x8_LSHIFT_DEFINED +#endif +#if !defined(VF32x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x) +{ + vf32x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} +# define VF32x2_SPLAT_DEFINED +#endif +#if !defined(VF32x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2]) +{ + vf32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VF32x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2]) +{ + vf32x2 vec; + memcpy(vec.generic, x, 8); + return vec; +} +# define VF32x2_LOAD_DEFINED +#endif +#if !defined(VF32x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2]) +{ + memcpy(x, vec.generic, 8); +} +# define VF32x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x2_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2]) +{ + memcpy(x, vec.generic, 8); +} +# define VF32x2_STORE_DEFINED +#endif +#if !defined(VF32x2_ADD_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} +# define VF32x2_ADD_DEFINED +#endif +#if !defined(VF32x2_SUB_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} +# define VF32x2_SUB_DEFINED +#endif +#if !defined(VF32x2_MUL_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} +# define VF32x2_MUL_DEFINED +#endif +#if !defined(VF32x2_DIV_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_div(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} +# define VF32x2_DIV_DEFINED +#endif +#if !defined(VF32x2_MOD_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_mod(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + return vec1; +} +# define VF32x2_MOD_DEFINED +#endif +#if !defined(VF32x2_AVG_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + return vec1; +} +# define VF32x2_AVG_DEFINED +#endif +#if !defined(VF32x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x2_CMPLT_DEFINED +#endif +#if !defined(VF32x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x2_CMPEQ_DEFINED +#endif +#if !defined(VF32x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x2_CMPGT_DEFINED +#endif +#if !defined(VF32x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x2_CMPLE_DEFINED +#endif +#if !defined(VF32x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x2_CMPGE_DEFINED +#endif +#if !defined(VF32x2_MIN_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_min(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} +# define VF32x2_MIN_DEFINED +#endif +#if !defined(VF32x2_MAX_DEFINED) +VEC_FUNC_IMPL vf32x2 vf32x2_max(vf32x2 vec1, vf32x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} +# define VF32x2_MAX_DEFINED +#endif +#if !defined(VF32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x) +{ + vf32x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VF32x4_SPLAT_DEFINED +#endif +#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4]) +{ + vf32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VF32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4]) +{ + vf32x4 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VF32x4_LOAD_DEFINED +#endif +#if !defined(VF32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VF32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4]) +{ + memcpy(x, vec.generic, 16); +} +# define VF32x4_STORE_DEFINED +#endif +#if !defined(VF32x4_ADD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VF32x4_ADD_DEFINED +#endif +#if !defined(VF32x4_SUB_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VF32x4_SUB_DEFINED +#endif +#if !defined(VF32x4_MUL_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VF32x4_MUL_DEFINED +#endif +#if !defined(VF32x4_DIV_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VF32x4_DIV_DEFINED +#endif +#if !defined(VF32x4_MOD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_mod(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0); + return vec1; +} +# define VF32x4_MOD_DEFINED +#endif +#if !defined(VF32x4_AVG_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2; + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2; + return vec1; +} +# define VF32x4_AVG_DEFINED +#endif +#if !defined(VF32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x4_CMPLT_DEFINED +#endif +#if !defined(VF32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x4_CMPEQ_DEFINED +#endif +#if !defined(VF32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x4_CMPGT_DEFINED +#endif +#if !defined(VF32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x4_CMPLE_DEFINED +#endif +#if !defined(VF32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x4_CMPGE_DEFINED +#endif +#if !defined(VF32x4_MIN_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VF32x4_MIN_DEFINED +#endif +#if !defined(VF32x4_MAX_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VF32x4_MAX_DEFINED +#endif +#if !defined(VF32x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x) +{ + vf32x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VF32x8_SPLAT_DEFINED +#endif +#if !defined(VF32x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8]) +{ + vf32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VF32x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8]) +{ + vf32x8 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VF32x8_LOAD_DEFINED +#endif +#if !defined(VF32x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VF32x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x8_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8]) +{ + memcpy(x, vec.generic, 32); +} +# define VF32x8_STORE_DEFINED +#endif +#if !defined(VF32x8_ADD_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VF32x8_ADD_DEFINED +#endif +#if !defined(VF32x8_SUB_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VF32x8_SUB_DEFINED +#endif +#if !defined(VF32x8_MUL_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VF32x8_MUL_DEFINED +#endif +#if !defined(VF32x8_DIV_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_div(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VF32x8_DIV_DEFINED +#endif +#if !defined(VF32x8_MOD_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_mod(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0); + return vec1; +} +# define VF32x8_MOD_DEFINED +#endif +#if !defined(VF32x8_AVG_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2; + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2; + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2; + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2; + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2; + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2; + return vec1; +} +# define VF32x8_AVG_DEFINED +#endif +#if !defined(VF32x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x8_CMPLT_DEFINED +#endif +#if !defined(VF32x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x8_CMPEQ_DEFINED +#endif +#if !defined(VF32x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x8_CMPGT_DEFINED +#endif +#if !defined(VF32x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x8_CMPLE_DEFINED +#endif +#if !defined(VF32x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x8_CMPGE_DEFINED +#endif +#if !defined(VF32x8_MIN_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_min(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VF32x8_MIN_DEFINED +#endif +#if !defined(VF32x8_MAX_DEFINED) +VEC_FUNC_IMPL vf32x8 vf32x8_max(vf32x8 vec1, vf32x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VF32x8_MAX_DEFINED +#endif +#if !defined(VF32x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x) +{ + vf32x16 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + vec.generic[8] = x; + vec.generic[9] = x; + vec.generic[10] = x; + vec.generic[11] = x; + vec.generic[12] = x; + vec.generic[13] = x; + vec.generic[14] = x; + vec.generic[15] = x; + return vec; +} +# define VF32x16_SPLAT_DEFINED +#endif +#if !defined(VF32x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16]) +{ + vf32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VF32x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16]) +{ + vf32x16 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VF32x16_LOAD_DEFINED +#endif +#if !defined(VF32x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VF32x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x16_STORE_DEFINED) +VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16]) +{ + memcpy(x, vec.generic, 64); +} +# define VF32x16_STORE_DEFINED +#endif +#if !defined(VF32x16_ADD_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]); + return vec1; +} +# define VF32x16_ADD_DEFINED +#endif +#if !defined(VF32x16_SUB_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]); + return vec1; +} +# define VF32x16_SUB_DEFINED +#endif +#if !defined(VF32x16_MUL_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]); + return vec1; +} +# define VF32x16_MUL_DEFINED +#endif +#if !defined(VF32x16_DIV_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_div(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0); + return vec1; +} +# define VF32x16_DIV_DEFINED +#endif +#if !defined(VF32x16_MOD_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_mod(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0); + vec1.generic[8] = (vec2.generic[8] ? fmod(vec1.generic[8], vec2.generic[8]) : 0); + vec1.generic[9] = (vec2.generic[9] ? fmod(vec1.generic[9], vec2.generic[9]) : 0); + vec1.generic[10] = (vec2.generic[10] ? fmod(vec1.generic[10], vec2.generic[10]) : 0); + vec1.generic[11] = (vec2.generic[11] ? fmod(vec1.generic[11], vec2.generic[11]) : 0); + vec1.generic[12] = (vec2.generic[12] ? fmod(vec1.generic[12], vec2.generic[12]) : 0); + vec1.generic[13] = (vec2.generic[13] ? fmod(vec1.generic[13], vec2.generic[13]) : 0); + vec1.generic[14] = (vec2.generic[14] ? fmod(vec1.generic[14], vec2.generic[14]) : 0); + vec1.generic[15] = (vec2.generic[15] ? fmod(vec1.generic[15], vec2.generic[15]) : 0); + return vec1; +} +# define VF32x16_MOD_DEFINED +#endif +#if !defined(VF32x16_AVG_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2; + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2; + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2; + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2; + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2; + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2; + vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]) / 2; + vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]) / 2; + vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]) / 2; + vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]) / 2; + vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]) / 2; + vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]) / 2; + vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]) / 2; + vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]) / 2; + return vec1; +} +# define VF32x16_AVG_DEFINED +#endif +#if !defined(VF32x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x16_CMPLT_DEFINED +#endif +#if !defined(VF32x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x16_CMPEQ_DEFINED +#endif +#if !defined(VF32x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x16_CMPGT_DEFINED +#endif +#if !defined(VF32x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x16_CMPLE_DEFINED +#endif +#if !defined(VF32x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4); + memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4); + memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4); + memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4); + memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4); + memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4); + memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4); + memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4); + memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4); + return vec1; +} +# define VF32x16_CMPGE_DEFINED +#endif +#if !defined(VF32x16_MIN_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_min(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VF32x16_MIN_DEFINED +#endif +#if !defined(VF32x16_MAX_DEFINED) +VEC_FUNC_IMPL vf32x16 vf32x16_max(vf32x16 vec1, vf32x16 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]); + vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]); + vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]); + vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]); + vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]); + vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]); + vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]); + vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]); + return vec1; +} +# define VF32x16_MAX_DEFINED +#endif +#if !defined(VF64x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x) +{ + vf64x2 vec; + vec.generic[0] = x; + vec.generic[1] = x; + return vec; +} +# define VF64x2_SPLAT_DEFINED +#endif +#if !defined(VF64x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2]) +{ + vf64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VF64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2]) +{ + vf64x2 vec; + memcpy(vec.generic, x, 16); + return vec; +} +# define VF64x2_LOAD_DEFINED +#endif +#if !defined(VF64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2]) +{ + memcpy(x, vec.generic, 16); +} +# define VF64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2]) +{ + memcpy(x, vec.generic, 16); +} +# define VF64x2_STORE_DEFINED +#endif +#if !defined(VF64x2_ADD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + return vec1; +} +# define VF64x2_ADD_DEFINED +#endif +#if !defined(VF64x2_SUB_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + return vec1; +} +# define VF64x2_SUB_DEFINED +#endif +#if !defined(VF64x2_MUL_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + return vec1; +} +# define VF64x2_MUL_DEFINED +#endif +#if !defined(VF64x2_DIV_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + return vec1; +} +# define VF64x2_DIV_DEFINED +#endif +#if !defined(VF64x2_MOD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_mod(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + return vec1; +} +# define VF64x2_MOD_DEFINED +#endif +#if !defined(VF64x2_AVG_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + return vec1; +} +# define VF64x2_AVG_DEFINED +#endif +#if !defined(VF64x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x2_CMPLT_DEFINED +#endif +#if !defined(VF64x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x2_CMPEQ_DEFINED +#endif +#if !defined(VF64x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x2_CMPGT_DEFINED +#endif +#if !defined(VF64x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x2_CMPLE_DEFINED +#endif +#if !defined(VF64x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x2_CMPGE_DEFINED +#endif +#if !defined(VF64x2_MIN_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} +# define VF64x2_MIN_DEFINED +#endif +#if !defined(VF64x2_MAX_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + return vec1; +} +# define VF64x2_MAX_DEFINED +#endif +#if !defined(VF64x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x) +{ + vf64x4 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + return vec; +} +# define VF64x4_SPLAT_DEFINED +#endif +#if !defined(VF64x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4]) +{ + vf64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VF64x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4]) +{ + vf64x4 vec; + memcpy(vec.generic, x, 32); + return vec; +} +# define VF64x4_LOAD_DEFINED +#endif +#if !defined(VF64x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VF64x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x4_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4]) +{ + memcpy(x, vec.generic, 32); +} +# define VF64x4_STORE_DEFINED +#endif +#if !defined(VF64x4_ADD_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + return vec1; +} +# define VF64x4_ADD_DEFINED +#endif +#if !defined(VF64x4_SUB_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + return vec1; +} +# define VF64x4_SUB_DEFINED +#endif +#if !defined(VF64x4_MUL_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + return vec1; +} +# define VF64x4_MUL_DEFINED +#endif +#if !defined(VF64x4_DIV_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_div(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + return vec1; +} +# define VF64x4_DIV_DEFINED +#endif +#if !defined(VF64x4_MOD_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_mod(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0); + return vec1; +} +# define VF64x4_MOD_DEFINED +#endif +#if !defined(VF64x4_AVG_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2; + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2; + return vec1; +} +# define VF64x4_AVG_DEFINED +#endif +#if !defined(VF64x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x4_CMPLT_DEFINED +#endif +#if !defined(VF64x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x4_CMPEQ_DEFINED +#endif +#if !defined(VF64x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x4_CMPGT_DEFINED +#endif +#if !defined(VF64x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x4_CMPLE_DEFINED +#endif +#if !defined(VF64x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x4_CMPGE_DEFINED +#endif +#if !defined(VF64x4_MIN_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_min(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VF64x4_MIN_DEFINED +#endif +#if !defined(VF64x4_MAX_DEFINED) +VEC_FUNC_IMPL vf64x4 vf64x4_max(vf64x4 vec1, vf64x4 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + return vec1; +} +# define VF64x4_MAX_DEFINED +#endif +#if !defined(VF64x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x) +{ + vf64x8 vec; + vec.generic[0] = x; + vec.generic[1] = x; + vec.generic[2] = x; + vec.generic[3] = x; + vec.generic[4] = x; + vec.generic[5] = x; + vec.generic[6] = x; + vec.generic[7] = x; + return vec; +} +# define VF64x8_SPLAT_DEFINED +#endif +#if !defined(VF64x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8]) +{ + vf64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VF64x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8]) +{ + vf64x8 vec; + memcpy(vec.generic, x, 64); + return vec; +} +# define VF64x8_LOAD_DEFINED +#endif +#if !defined(VF64x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VF64x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x8_STORE_DEFINED) +VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8]) +{ + memcpy(x, vec.generic, 64); +} +# define VF64x8_STORE_DEFINED +#endif +#if !defined(VF64x8_ADD_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]); + return vec1; +} +# define VF64x8_ADD_DEFINED +#endif +#if !defined(VF64x8_SUB_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]); + return vec1; +} +# define VF64x8_SUB_DEFINED +#endif +#if !defined(VF64x8_MUL_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]); + return vec1; +} +# define VF64x8_MUL_DEFINED +#endif +#if !defined(VF64x8_DIV_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_div(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0); + return vec1; +} +# define VF64x8_DIV_DEFINED +#endif +#if !defined(VF64x8_MOD_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_mod(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0); + vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0); + vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0); + vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0); + vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0); + vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0); + vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0); + vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0); + return vec1; +} +# define VF64x8_MOD_DEFINED +#endif +#if !defined(VF64x8_AVG_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2; + vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2; + vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2; + vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2; + vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2; + vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2; + vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2; + vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2; + return vec1; +} +# define VF64x8_AVG_DEFINED +#endif +#if !defined(VF64x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x8_CMPLT_DEFINED +#endif +#if !defined(VF64x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x8_CMPEQ_DEFINED +#endif +#if !defined(VF64x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x8_CMPGT_DEFINED +#endif +#if !defined(VF64x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x8_CMPLE_DEFINED +#endif +#if !defined(VF64x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2) +{ + memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8); + memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8); + memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8); + memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8); + memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8); + memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8); + memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8); + memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8); + return vec1; +} +# define VF64x8_CMPGE_DEFINED +#endif +#if !defined(VF64x8_MIN_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_min(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VF64x8_MIN_DEFINED +#endif +#if !defined(VF64x8_MAX_DEFINED) +VEC_FUNC_IMPL vf64x8 vf64x8_max(vf64x8 vec1, vf64x8 vec2) +{ + vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]); + vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]); + vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]); + vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]); + vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]); + vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]); + vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]); + vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]); + return vec1; +} +# define VF64x8_MAX_DEFINED +#endif
--- a/include/vec/impl/ppc/altivec.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/ppc/altivec.h Wed Apr 30 18:36:38 2025 -0400 @@ -25,16 +25,10 @@ /* This file is automatically generated! Do not edit it directly! * Edit the code that generates it in utils/genaltivec.c --paper */ -#ifndef VEC_IMPL_PPC_ALTIVEC_H_ -#define VEC_IMPL_PPC_ALTIVEC_H_ - - +/* ------------------------------------------------------------------------ */ +/* PREPROCESSOR HELL INCOMING */ - -/* vuint8x16 */ - -#if defined(vec_splats) || defined(vec_splat_s8) -#ifndef VINT8x16_SPLAT_DEFINED +#if !defined(VINT8x16_SPLAT_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) { vint8x16 vec; @@ -43,8 +37,7 @@ } # define VINT8x16_SPLAT_DEFINED #endif -#endif -#ifndef VINT8x16_LOAD_ALIGNED_DEFINED +#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) { vint8x16 vec; @@ -53,7 +46,7 @@ } # define VINT8x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT8x16_LOAD_DEFINED +#if !defined(VINT8x16_LOAD_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) { vint8x16 vec; @@ -62,21 +55,14 @@ } # define VINT8x16_LOAD_DEFINED #endif -#ifndef VINT8x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16]) +#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VINT8x16_STORE_ALIGNED_DEFINED #endif -#ifndef VINT8x16_STORE_DEFINED -VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VINT8x16_STORE_DEFINED -#endif -#ifndef VINT8x16_ADD_DEFINED +#if !defined(VINT8x16_ADD_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -85,7 +71,7 @@ } # define VINT8x16_ADD_DEFINED #endif -#ifndef VINT8x16_SUB_DEFINED +#if !defined(VINT8x16_SUB_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -94,8 +80,7 @@ } # define VINT8x16_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VINT8x16_MUL_DEFINED +#if !defined(VINT8x16_MUL_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -104,8 +89,16 @@ } # define VINT8x16_MUL_DEFINED #endif +#if !defined(VINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_AVG_DEFINED #endif -#ifndef VINT8x16_AND_DEFINED +#if !defined(VINT8x16_AND_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -114,7 +107,7 @@ } # define VINT8x16_AND_DEFINED #endif -#ifndef VINT8x16_OR_DEFINED +#if !defined(VINT8x16_OR_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -123,7 +116,7 @@ } # define VINT8x16_OR_DEFINED #endif -#ifndef VINT8x16_XOR_DEFINED +#if !defined(VINT8x16_XOR_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -132,7 +125,7 @@ } # define VINT8x16_XOR_DEFINED #endif -#ifndef VINT8x16_CMPLT_DEFINED +#if !defined(VINT8x16_CMPLT_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -141,7 +134,7 @@ } # define VINT8x16_CMPLT_DEFINED #endif -#ifndef VINT8x16_CMPEQ_DEFINED +#if !defined(VINT8x16_CMPEQ_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -150,7 +143,7 @@ } # define VINT8x16_CMPEQ_DEFINED #endif -#ifndef VINT8x16_CMPGT_DEFINED +#if !defined(VINT8x16_CMPGT_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -159,7 +152,7 @@ } # define VINT8x16_CMPGT_DEFINED #endif -#ifndef VINT8x16_MIN_DEFINED +#if !defined(VINT8x16_MIN_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -168,7 +161,7 @@ } # define VINT8x16_MIN_DEFINED #endif -#ifndef VINT8x16_MAX_DEFINED +#if !defined(VINT8x16_MAX_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) { vint8x16 vec; @@ -177,34 +170,7 @@ } # define VINT8x16_MAX_DEFINED #endif -#ifndef VINT8x16_AVG_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) -{ - vint8x16 vec; - vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT8x16_AVG_DEFINED -#endif -#ifndef VINT8x16_LSHIFT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) -{ - vint8x16 vec; - vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT8x16_LSHIFT_DEFINED -#endif -#ifndef VINT8x16_LRSHIFT_DEFINED -VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) -{ - vint8x16 vec; - vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT8x16_LRSHIFT_DEFINED -#endif -#ifndef VINT8x16_RSHIFT_DEFINED +#if !defined(VINT8x16_RSHIFT_DEFINED) VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) { vint8x16 vec; @@ -213,12 +179,25 @@ } # define VINT8x16_RSHIFT_DEFINED #endif - - -/* vint8x16 */ - -#if defined(vec_splats) || defined(vec_splat_u8) -#ifndef VUINT8x16_SPLAT_DEFINED +#if !defined(VINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_SPLAT_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) { vuint8x16 vec; @@ -227,8 +206,7 @@ } # define VUINT8x16_SPLAT_DEFINED #endif -#endif -#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED +#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) { vuint8x16 vec; @@ -237,7 +215,7 @@ } # define VUINT8x16_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT8x16_LOAD_DEFINED +#if !defined(VUINT8x16_LOAD_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) { vuint8x16 vec; @@ -246,21 +224,14 @@ } # define VUINT8x16_LOAD_DEFINED #endif -#ifndef VUINT8x16_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16]) +#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VUINT8x16_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT8x16_STORE_DEFINED -VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VUINT8x16_STORE_DEFINED -#endif -#ifndef VUINT8x16_ADD_DEFINED +#if !defined(VUINT8x16_ADD_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -269,7 +240,7 @@ } # define VUINT8x16_ADD_DEFINED #endif -#ifndef VUINT8x16_SUB_DEFINED +#if !defined(VUINT8x16_SUB_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -278,8 +249,7 @@ } # define VUINT8x16_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VUINT8x16_MUL_DEFINED +#if !defined(VUINT8x16_MUL_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -288,8 +258,16 @@ } # define VUINT8x16_MUL_DEFINED #endif +#if !defined(VUINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_AVG_DEFINED #endif -#ifndef VUINT8x16_AND_DEFINED +#if !defined(VUINT8x16_AND_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -298,7 +276,7 @@ } # define VUINT8x16_AND_DEFINED #endif -#ifndef VUINT8x16_OR_DEFINED +#if !defined(VUINT8x16_OR_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -307,7 +285,7 @@ } # define VUINT8x16_OR_DEFINED #endif -#ifndef VUINT8x16_XOR_DEFINED +#if !defined(VUINT8x16_XOR_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -316,7 +294,7 @@ } # define VUINT8x16_XOR_DEFINED #endif -#ifndef VUINT8x16_CMPLT_DEFINED +#if !defined(VUINT8x16_CMPLT_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -325,7 +303,7 @@ } # define VUINT8x16_CMPLT_DEFINED #endif -#ifndef VUINT8x16_CMPEQ_DEFINED +#if !defined(VUINT8x16_CMPEQ_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -334,7 +312,7 @@ } # define VUINT8x16_CMPEQ_DEFINED #endif -#ifndef VUINT8x16_CMPGT_DEFINED +#if !defined(VUINT8x16_CMPGT_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -343,7 +321,7 @@ } # define VUINT8x16_CMPGT_DEFINED #endif -#ifndef VUINT8x16_MIN_DEFINED +#if !defined(VUINT8x16_MIN_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -352,7 +330,7 @@ } # define VUINT8x16_MIN_DEFINED #endif -#ifndef VUINT8x16_MAX_DEFINED +#if !defined(VUINT8x16_MAX_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -361,16 +339,25 @@ } # define VUINT8x16_MAX_DEFINED #endif -#ifndef VUINT8x16_AVG_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +#if !defined(VUINT8x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; - vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec); + vec.altivec = vec_sr(vec1.altivec, vec2.altivec); return vec; } -# define VUINT8x16_AVG_DEFINED +# define VUINT8x16_RSHIFT_DEFINED #endif -#ifndef VUINT8x16_LSHIFT_DEFINED +#if !defined(VUINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_LSHIFT_DEFINED) VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) { vuint8x16 vec; @@ -379,30 +366,7 @@ } # define VUINT8x16_LSHIFT_DEFINED #endif -#ifndef VUINT8x16_LRSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) -{ - vuint8x16 vec; - vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec); - return vec; -} -# define VUINT8x16_LRSHIFT_DEFINED -#endif -#ifndef VUINT8x16_RSHIFT_DEFINED -VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) -{ - vuint8x16 vec; - vec.altivec = vec_sr(vec1.altivec, vec2.altivec); - return vec; -} -# define VUINT8x16_RSHIFT_DEFINED -#endif - - -/* vuint16x8 */ - -#if defined(vec_splats) || defined(vec_splat_s16) -#ifndef VINT16x8_SPLAT_DEFINED +#if !defined(VINT16x8_SPLAT_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) { vint16x8 vec; @@ -411,8 +375,7 @@ } # define VINT16x8_SPLAT_DEFINED #endif -#endif -#ifndef VINT16x8_LOAD_ALIGNED_DEFINED +#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) { vint16x8 vec; @@ -421,7 +384,7 @@ } # define VINT16x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT16x8_LOAD_DEFINED +#if !defined(VINT16x8_LOAD_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) { vint16x8 vec; @@ -430,21 +393,14 @@ } # define VINT16x8_LOAD_DEFINED #endif -#ifndef VINT16x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8]) +#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VINT16x8_STORE_ALIGNED_DEFINED #endif -#ifndef VINT16x8_STORE_DEFINED -VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VINT16x8_STORE_DEFINED -#endif -#ifndef VINT16x8_ADD_DEFINED +#if !defined(VINT16x8_ADD_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -453,7 +409,7 @@ } # define VINT16x8_ADD_DEFINED #endif -#ifndef VINT16x8_SUB_DEFINED +#if !defined(VINT16x8_SUB_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -462,8 +418,7 @@ } # define VINT16x8_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VINT16x8_MUL_DEFINED +#if !defined(VINT16x8_MUL_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -472,8 +427,16 @@ } # define VINT16x8_MUL_DEFINED #endif +#if !defined(VINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_AVG_DEFINED #endif -#ifndef VINT16x8_AND_DEFINED +#if !defined(VINT16x8_AND_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -482,7 +445,7 @@ } # define VINT16x8_AND_DEFINED #endif -#ifndef VINT16x8_OR_DEFINED +#if !defined(VINT16x8_OR_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -491,7 +454,7 @@ } # define VINT16x8_OR_DEFINED #endif -#ifndef VINT16x8_XOR_DEFINED +#if !defined(VINT16x8_XOR_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -500,7 +463,7 @@ } # define VINT16x8_XOR_DEFINED #endif -#ifndef VINT16x8_CMPLT_DEFINED +#if !defined(VINT16x8_CMPLT_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -509,7 +472,7 @@ } # define VINT16x8_CMPLT_DEFINED #endif -#ifndef VINT16x8_CMPEQ_DEFINED +#if !defined(VINT16x8_CMPEQ_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -518,7 +481,7 @@ } # define VINT16x8_CMPEQ_DEFINED #endif -#ifndef VINT16x8_CMPGT_DEFINED +#if !defined(VINT16x8_CMPGT_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -527,7 +490,7 @@ } # define VINT16x8_CMPGT_DEFINED #endif -#ifndef VINT16x8_MIN_DEFINED +#if !defined(VINT16x8_MIN_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -536,7 +499,7 @@ } # define VINT16x8_MIN_DEFINED #endif -#ifndef VINT16x8_MAX_DEFINED +#if !defined(VINT16x8_MAX_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) { vint16x8 vec; @@ -545,25 +508,16 @@ } # define VINT16x8_MAX_DEFINED #endif -#ifndef VINT16x8_AVG_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +#if !defined(VINT16x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) { vint16x8 vec; - vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec); + vec.altivec = vec_sra(vec1.altivec, vec2.altivec); return vec; } -# define VINT16x8_AVG_DEFINED +# define VINT16x8_RSHIFT_DEFINED #endif -#ifndef VINT16x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) -{ - vint16x8 vec; - vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT16x8_LSHIFT_DEFINED -#endif -#ifndef VINT16x8_LRSHIFT_DEFINED +#if !defined(VINT16x8_LRSHIFT_DEFINED) VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) { vint16x8 vec; @@ -572,21 +526,16 @@ } # define VINT16x8_LRSHIFT_DEFINED #endif -#ifndef VINT16x8_RSHIFT_DEFINED -VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) +#if !defined(VINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) { vint16x8 vec; - vec.altivec = vec_sra(vec1.altivec, vec2.altivec); + vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec); return vec; } -# define VINT16x8_RSHIFT_DEFINED +# define VINT16x8_LSHIFT_DEFINED #endif - - -/* vint16x8 */ - -#if defined(vec_splats) || defined(vec_splat_u16) -#ifndef VUINT16x8_SPLAT_DEFINED +#if !defined(VUINT16x8_SPLAT_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) { vuint16x8 vec; @@ -595,8 +544,7 @@ } # define VUINT16x8_SPLAT_DEFINED #endif -#endif -#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED +#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) { vuint16x8 vec; @@ -605,7 +553,7 @@ } # define VUINT16x8_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT16x8_LOAD_DEFINED +#if !defined(VUINT16x8_LOAD_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) { vuint16x8 vec; @@ -614,21 +562,14 @@ } # define VUINT16x8_LOAD_DEFINED #endif -#ifndef VUINT16x8_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8]) +#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VUINT16x8_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT16x8_STORE_DEFINED -VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VUINT16x8_STORE_DEFINED -#endif -#ifndef VUINT16x8_ADD_DEFINED +#if !defined(VUINT16x8_ADD_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -637,7 +578,7 @@ } # define VUINT16x8_ADD_DEFINED #endif -#ifndef VUINT16x8_SUB_DEFINED +#if !defined(VUINT16x8_SUB_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -646,8 +587,7 @@ } # define VUINT16x8_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VUINT16x8_MUL_DEFINED +#if !defined(VUINT16x8_MUL_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -656,8 +596,16 @@ } # define VUINT16x8_MUL_DEFINED #endif +#if !defined(VUINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_AVG_DEFINED #endif -#ifndef VUINT16x8_AND_DEFINED +#if !defined(VUINT16x8_AND_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -666,7 +614,7 @@ } # define VUINT16x8_AND_DEFINED #endif -#ifndef VUINT16x8_OR_DEFINED +#if !defined(VUINT16x8_OR_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -675,7 +623,7 @@ } # define VUINT16x8_OR_DEFINED #endif -#ifndef VUINT16x8_XOR_DEFINED +#if !defined(VUINT16x8_XOR_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -684,7 +632,7 @@ } # define VUINT16x8_XOR_DEFINED #endif -#ifndef VUINT16x8_CMPLT_DEFINED +#if !defined(VUINT16x8_CMPLT_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -693,7 +641,7 @@ } # define VUINT16x8_CMPLT_DEFINED #endif -#ifndef VUINT16x8_CMPEQ_DEFINED +#if !defined(VUINT16x8_CMPEQ_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -702,7 +650,7 @@ } # define VUINT16x8_CMPEQ_DEFINED #endif -#ifndef VUINT16x8_CMPGT_DEFINED +#if !defined(VUINT16x8_CMPGT_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -711,7 +659,7 @@ } # define VUINT16x8_CMPGT_DEFINED #endif -#ifndef VUINT16x8_MIN_DEFINED +#if !defined(VUINT16x8_MIN_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -720,7 +668,7 @@ } # define VUINT16x8_MIN_DEFINED #endif -#ifndef VUINT16x8_MAX_DEFINED +#if !defined(VUINT16x8_MAX_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -729,34 +677,7 @@ } # define VUINT16x8_MAX_DEFINED #endif -#ifndef VUINT16x8_AVG_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) -{ - vuint16x8 vec; - vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec); - return vec; -} -# define VUINT16x8_AVG_DEFINED -#endif -#ifndef VUINT16x8_LSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) -{ - vuint16x8 vec; - vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec); - return vec; -} -# define VUINT16x8_LSHIFT_DEFINED -#endif -#ifndef VUINT16x8_LRSHIFT_DEFINED -VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) -{ - vuint16x8 vec; - vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec); - return vec; -} -# define VUINT16x8_LRSHIFT_DEFINED -#endif -#ifndef VUINT16x8_RSHIFT_DEFINED +#if !defined(VUINT16x8_RSHIFT_DEFINED) VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) { vuint16x8 vec; @@ -765,12 +686,25 @@ } # define VUINT16x8_RSHIFT_DEFINED #endif - - -/* vuint32x4 */ - -#if defined(vec_splats) || defined(vec_splat_s32) -#ifndef VINT32x4_SPLAT_DEFINED +#if !defined(VUINT16x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VINT32x4_SPLAT_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) { vint32x4 vec; @@ -779,8 +713,7 @@ } # define VINT32x4_SPLAT_DEFINED #endif -#endif -#ifndef VINT32x4_LOAD_ALIGNED_DEFINED +#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) { vint32x4 vec; @@ -789,7 +722,7 @@ } # define VINT32x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VINT32x4_LOAD_DEFINED +#if !defined(VINT32x4_LOAD_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) { vint32x4 vec; @@ -798,21 +731,14 @@ } # define VINT32x4_LOAD_DEFINED #endif -#ifndef VINT32x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4]) +#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VINT32x4_STORE_ALIGNED_DEFINED #endif -#ifndef VINT32x4_STORE_DEFINED -VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VINT32x4_STORE_DEFINED -#endif -#ifndef VINT32x4_ADD_DEFINED +#if !defined(VINT32x4_ADD_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -821,7 +747,7 @@ } # define VINT32x4_ADD_DEFINED #endif -#ifndef VINT32x4_SUB_DEFINED +#if !defined(VINT32x4_SUB_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -830,8 +756,7 @@ } # define VINT32x4_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VINT32x4_MUL_DEFINED +#if !defined(VINT32x4_MUL_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -840,8 +765,16 @@ } # define VINT32x4_MUL_DEFINED #endif +#if !defined(VINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_AVG_DEFINED #endif -#ifndef VINT32x4_AND_DEFINED +#if !defined(VINT32x4_AND_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -850,7 +783,7 @@ } # define VINT32x4_AND_DEFINED #endif -#ifndef VINT32x4_OR_DEFINED +#if !defined(VINT32x4_OR_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -859,7 +792,7 @@ } # define VINT32x4_OR_DEFINED #endif -#ifndef VINT32x4_XOR_DEFINED +#if !defined(VINT32x4_XOR_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -868,7 +801,7 @@ } # define VINT32x4_XOR_DEFINED #endif -#ifndef VINT32x4_CMPLT_DEFINED +#if !defined(VINT32x4_CMPLT_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -877,7 +810,7 @@ } # define VINT32x4_CMPLT_DEFINED #endif -#ifndef VINT32x4_CMPEQ_DEFINED +#if !defined(VINT32x4_CMPEQ_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -886,7 +819,7 @@ } # define VINT32x4_CMPEQ_DEFINED #endif -#ifndef VINT32x4_CMPGT_DEFINED +#if !defined(VINT32x4_CMPGT_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -895,7 +828,7 @@ } # define VINT32x4_CMPGT_DEFINED #endif -#ifndef VINT32x4_MIN_DEFINED +#if !defined(VINT32x4_MIN_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -904,7 +837,7 @@ } # define VINT32x4_MIN_DEFINED #endif -#ifndef VINT32x4_MAX_DEFINED +#if !defined(VINT32x4_MAX_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) { vint32x4 vec; @@ -913,16 +846,25 @@ } # define VINT32x4_MAX_DEFINED #endif -#ifndef VINT32x4_AVG_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +#if !defined(VINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) { vint32x4 vec; - vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec); + vec.altivec = vec_sra(vec1.altivec, vec2.altivec); return vec; } -# define VINT32x4_AVG_DEFINED +# define VINT32x4_RSHIFT_DEFINED #endif -#ifndef VINT32x4_LSHIFT_DEFINED +#if !defined(VINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x4_LSHIFT_DEFINED) VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) { vint32x4 vec; @@ -931,30 +873,7 @@ } # define VINT32x4_LSHIFT_DEFINED #endif -#ifndef VINT32x4_LRSHIFT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) -{ - vint32x4 vec; - vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT32x4_LRSHIFT_DEFINED -#endif -#ifndef VINT32x4_RSHIFT_DEFINED -VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) -{ - vint32x4 vec; - vec.altivec = vec_sra(vec1.altivec, vec2.altivec); - return vec; -} -# define VINT32x4_RSHIFT_DEFINED -#endif - - -/* vint32x4 */ - -#if defined(vec_splats) || defined(vec_splat_u32) -#ifndef VUINT32x4_SPLAT_DEFINED +#if !defined(VUINT32x4_SPLAT_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) { vuint32x4 vec; @@ -963,8 +882,7 @@ } # define VUINT32x4_SPLAT_DEFINED #endif -#endif -#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED +#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) { vuint32x4 vec; @@ -973,7 +891,7 @@ } # define VUINT32x4_LOAD_ALIGNED_DEFINED #endif -#ifndef VUINT32x4_LOAD_DEFINED +#if !defined(VUINT32x4_LOAD_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) { vuint32x4 vec; @@ -982,21 +900,14 @@ } # define VUINT32x4_LOAD_DEFINED #endif -#ifndef VUINT32x4_STORE_ALIGNED_DEFINED -VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4]) +#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4]) { - vec_st(vec.altivec, 0, arr); + vec_st(vec.altivec, 0, x); } # define VUINT32x4_STORE_ALIGNED_DEFINED #endif -#ifndef VUINT32x4_STORE_DEFINED -VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4]) -{ - memcpy(arr, &vec, sizeof(vec)); -} -# define VUINT32x4_STORE_DEFINED -#endif -#ifndef VUINT32x4_ADD_DEFINED +#if !defined(VUINT32x4_ADD_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1005,7 +916,7 @@ } # define VUINT32x4_ADD_DEFINED #endif -#ifndef VUINT32x4_SUB_DEFINED +#if !defined(VUINT32x4_SUB_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1014,8 +925,7 @@ } # define VUINT32x4_SUB_DEFINED #endif -#ifdef vec_mul -#ifndef VUINT32x4_MUL_DEFINED +#if !defined(VUINT32x4_MUL_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1024,8 +934,16 @@ } # define VUINT32x4_MUL_DEFINED #endif +#if !defined(VUINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_AVG_DEFINED #endif -#ifndef VUINT32x4_AND_DEFINED +#if !defined(VUINT32x4_AND_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1034,7 +952,7 @@ } # define VUINT32x4_AND_DEFINED #endif -#ifndef VUINT32x4_OR_DEFINED +#if !defined(VUINT32x4_OR_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1043,7 +961,7 @@ } # define VUINT32x4_OR_DEFINED #endif -#ifndef VUINT32x4_XOR_DEFINED +#if !defined(VUINT32x4_XOR_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1052,7 +970,7 @@ } # define VUINT32x4_XOR_DEFINED #endif -#ifndef VUINT32x4_CMPLT_DEFINED +#if !defined(VUINT32x4_CMPLT_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1061,7 +979,7 @@ } # define VUINT32x4_CMPLT_DEFINED #endif -#ifndef VUINT32x4_CMPEQ_DEFINED +#if !defined(VUINT32x4_CMPEQ_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1070,7 +988,7 @@ } # define VUINT32x4_CMPEQ_DEFINED #endif -#ifndef VUINT32x4_CMPGT_DEFINED +#if !defined(VUINT32x4_CMPGT_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1079,7 +997,7 @@ } # define VUINT32x4_CMPGT_DEFINED #endif -#ifndef VUINT32x4_MIN_DEFINED +#if !defined(VUINT32x4_MIN_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1088,7 +1006,7 @@ } # define VUINT32x4_MIN_DEFINED #endif -#ifndef VUINT32x4_MAX_DEFINED +#if !defined(VUINT32x4_MAX_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1097,16 +1015,25 @@ } # define VUINT32x4_MAX_DEFINED #endif -#ifndef VUINT32x4_AVG_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +#if !defined(VUINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; - vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec); + vec.altivec = vec_sr(vec1.altivec, vec2.altivec); return vec; } -# define VUINT32x4_AVG_DEFINED +# define VUINT32x4_RSHIFT_DEFINED #endif -#ifndef VUINT32x4_LSHIFT_DEFINED +#if !defined(VUINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_LSHIFT_DEFINED) VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) { vuint32x4 vec; @@ -1115,23 +1042,109 @@ } # define VUINT32x4_LSHIFT_DEFINED #endif -#ifndef VUINT32x4_LRSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +#if !defined(VF32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x) +{ + vf32x4 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VF32x4_SPLAT_DEFINED +#endif +#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VF32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4]) { - vuint32x4 vec; - vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec); + vf32x4 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VF32x4_LOAD_DEFINED +#endif +#if !defined(VF32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4]) +{ + vec_st(vec.altivec, 0, x); +} +# define VF32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_ADD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_ADD_DEFINED +#endif +#if !defined(VF32x4_SUB_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_sub(vec1.altivec, vec2.altivec); return vec; } -# define VUINT32x4_LRSHIFT_DEFINED +# define VF32x4_SUB_DEFINED +#endif +#if !defined(VF32x4_MUL_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MUL_DEFINED #endif -#ifndef VUINT32x4_RSHIFT_DEFINED -VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) +#if !defined(VF32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2) { - vuint32x4 vec; - vec.altivec = vec_sr(vec1.altivec, vec2.altivec); + vf32x4 vec; + vec.altivec = (vector float)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPLT_DEFINED +#endif +#if !defined(VF32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmpeq(vec1.altivec, vec2.altivec); return vec; } -# define VUINT32x4_RSHIFT_DEFINED +# define VF32x4_CMPEQ_DEFINED +#endif +#if !defined(VF32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPGT_DEFINED #endif -#endif /* VEC_IMPL_PPC_ALTIVEC_H_ */ - +#if !defined(VF32x4_MIN_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MIN_DEFINED +#endif +#if !defined(VF32x4_MAX_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MAX_DEFINED +#endif
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/vec/impl/ppc/vsx.h Wed Apr 30 18:36:38 2025 -0400 @@ -0,0 +1,1418 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +/* This file is automatically generated! Do not edit it directly! + * Edit the code that generates it in utils/genvsx.c --paper */ + +/* ------------------------------------------------------------------------ */ +/* PREPROCESSOR HELL INCOMING */ + +#if !defined(VINT8x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) +{ + vint8x16 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VINT8x16_SPLAT_DEFINED +#endif +#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) +{ + vint8x16 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VINT8x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT8x16_LOAD_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) +{ + vint8x16 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VINT8x16_LOAD_DEFINED +#endif +#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16]) +{ + vec_st(vec.altivec, 0, x); +} +# define VINT8x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT8x16_ADD_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_ADD_DEFINED +#endif +#if !defined(VINT8x16_SUB_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_SUB_DEFINED +#endif +#if !defined(VINT8x16_MUL_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_MUL_DEFINED +#endif +#if !defined(VINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_AVG_DEFINED +#endif +#if !defined(VINT8x16_AND_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_AND_DEFINED +#endif +#if !defined(VINT8x16_OR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_OR_DEFINED +#endif +#if !defined(VINT8x16_XOR_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_XOR_DEFINED +#endif +#if !defined(VINT8x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_CMPLT_DEFINED +#endif +#if !defined(VINT8x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_CMPEQ_DEFINED +#endif +#if !defined(VINT8x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_CMPGT_DEFINED +#endif +#if !defined(VINT8x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_CMPLE_DEFINED +#endif +#if !defined(VINT8x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_CMPGE_DEFINED +#endif +#if !defined(VINT8x16_MIN_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_MIN_DEFINED +#endif +#if !defined(VINT8x16_MAX_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_MAX_DEFINED +#endif +#if !defined(VINT8x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = vec_sra(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_RSHIFT_DEFINED +#endif +#if !defined(VINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VINT8x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + vint8x16 vec; + vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) +{ + vuint8x16 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VUINT8x16_SPLAT_DEFINED +#endif +#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) +{ + vuint8x16 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VUINT8x16_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x16_LOAD_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) +{ + vuint8x16 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VUINT8x16_LOAD_DEFINED +#endif +#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16]) +{ + vec_st(vec.altivec, 0, x); +} +# define VUINT8x16_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT8x16_ADD_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_ADD_DEFINED +#endif +#if !defined(VUINT8x16_SUB_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_SUB_DEFINED +#endif +#if !defined(VUINT8x16_MUL_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_MUL_DEFINED +#endif +#if !defined(VUINT8x16_AVG_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_AVG_DEFINED +#endif +#if !defined(VUINT8x16_AND_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_AND_DEFINED +#endif +#if !defined(VUINT8x16_OR_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_OR_DEFINED +#endif +#if !defined(VUINT8x16_XOR_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_XOR_DEFINED +#endif +#if !defined(VUINT8x16_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_CMPLT_DEFINED +#endif +#if !defined(VUINT8x16_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_CMPEQ_DEFINED +#endif +#if !defined(VUINT8x16_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_CMPGT_DEFINED +#endif +#if !defined(VUINT8x16_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_CMPLE_DEFINED +#endif +#if !defined(VUINT8x16_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_CMPGE_DEFINED +#endif +#if !defined(VUINT8x16_MIN_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_MIN_DEFINED +#endif +#if !defined(VUINT8x16_MAX_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_MAX_DEFINED +#endif +#if !defined(VUINT8x16_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_RSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_LRSHIFT_DEFINED +#endif +#if !defined(VUINT8x16_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 vec; + vec.altivec = (vector unsigned char)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT8x16_LSHIFT_DEFINED +#endif +#if !defined(VINT16x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) +{ + vint16x8 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VINT16x8_SPLAT_DEFINED +#endif +#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) +{ + vint16x8 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VINT16x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT16x8_LOAD_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) +{ + vint16x8 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VINT16x8_LOAD_DEFINED +#endif +#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8]) +{ + vec_st(vec.altivec, 0, x); +} +# define VINT16x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT16x8_ADD_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_ADD_DEFINED +#endif +#if !defined(VINT16x8_SUB_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_SUB_DEFINED +#endif +#if !defined(VINT16x8_MUL_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_MUL_DEFINED +#endif +#if !defined(VINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_AVG_DEFINED +#endif +#if !defined(VINT16x8_AND_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_AND_DEFINED +#endif +#if !defined(VINT16x8_OR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_OR_DEFINED +#endif +#if !defined(VINT16x8_XOR_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_XOR_DEFINED +#endif +#if !defined(VINT16x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_CMPLT_DEFINED +#endif +#if !defined(VINT16x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_CMPEQ_DEFINED +#endif +#if !defined(VINT16x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_CMPGT_DEFINED +#endif +#if !defined(VINT16x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_CMPLE_DEFINED +#endif +#if !defined(VINT16x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_CMPGE_DEFINED +#endif +#if !defined(VINT16x8_MIN_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_MIN_DEFINED +#endif +#if !defined(VINT16x8_MAX_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_MAX_DEFINED +#endif +#if !defined(VINT16x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = vec_sra(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_RSHIFT_DEFINED +#endif +#if !defined(VINT16x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_LRSHIFT_DEFINED +#endif +#if !defined(VINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) +{ + vint16x8 vec; + vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) +{ + vuint16x8 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VUINT16x8_SPLAT_DEFINED +#endif +#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) +{ + vuint16x8 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VUINT16x8_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x8_LOAD_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) +{ + vuint16x8 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VUINT16x8_LOAD_DEFINED +#endif +#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8]) +{ + vec_st(vec.altivec, 0, x); +} +# define VUINT16x8_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT16x8_ADD_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_ADD_DEFINED +#endif +#if !defined(VUINT16x8_SUB_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_SUB_DEFINED +#endif +#if !defined(VUINT16x8_MUL_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_MUL_DEFINED +#endif +#if !defined(VUINT16x8_AVG_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_AVG_DEFINED +#endif +#if !defined(VUINT16x8_AND_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_AND_DEFINED +#endif +#if !defined(VUINT16x8_OR_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_OR_DEFINED +#endif +#if !defined(VUINT16x8_XOR_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_XOR_DEFINED +#endif +#if !defined(VUINT16x8_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_CMPLT_DEFINED +#endif +#if !defined(VUINT16x8_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_CMPEQ_DEFINED +#endif +#if !defined(VUINT16x8_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_CMPGT_DEFINED +#endif +#if !defined(VUINT16x8_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_CMPLE_DEFINED +#endif +#if !defined(VUINT16x8_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_CMPGE_DEFINED +#endif +#if !defined(VUINT16x8_MIN_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_MIN_DEFINED +#endif +#if !defined(VUINT16x8_MAX_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_MAX_DEFINED +#endif +#if !defined(VUINT16x8_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_RSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_LRSHIFT_DEFINED +#endif +#if !defined(VUINT16x8_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 vec; + vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT16x8_LSHIFT_DEFINED +#endif +#if !defined(VINT32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) +{ + vint32x4 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VINT32x4_SPLAT_DEFINED +#endif +#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) +{ + vint32x4 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VINT32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VINT32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) +{ + vint32x4 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VINT32x4_LOAD_DEFINED +#endif +#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4]) +{ + vec_st(vec.altivec, 0, x); +} +# define VINT32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VINT32x4_ADD_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_ADD_DEFINED +#endif +#if !defined(VINT32x4_SUB_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_SUB_DEFINED +#endif +#if !defined(VINT32x4_MUL_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_MUL_DEFINED +#endif +#if !defined(VINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_AVG_DEFINED +#endif +#if !defined(VINT32x4_AND_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_AND_DEFINED +#endif +#if !defined(VINT32x4_OR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_OR_DEFINED +#endif +#if !defined(VINT32x4_XOR_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_XOR_DEFINED +#endif +#if !defined(VINT32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_CMPLT_DEFINED +#endif +#if !defined(VINT32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_CMPEQ_DEFINED +#endif +#if !defined(VINT32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_CMPGT_DEFINED +#endif +#if !defined(VINT32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_CMPLE_DEFINED +#endif +#if !defined(VINT32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_CMPGE_DEFINED +#endif +#if !defined(VINT32x4_MIN_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_MIN_DEFINED +#endif +#if !defined(VINT32x4_MAX_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_MAX_DEFINED +#endif +#if !defined(VINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = vec_sra(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_RSHIFT_DEFINED +#endif +#if !defined(VINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VINT32x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) +{ + vint32x4 vec; + vec.altivec = (vector signed int)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) +{ + vuint32x4 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VUINT32x4_SPLAT_DEFINED +#endif +#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) +{ + vuint32x4 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VUINT32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) +{ + vuint32x4 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VUINT32x4_LOAD_DEFINED +#endif +#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4]) +{ + vec_st(vec.altivec, 0, x); +} +# define VUINT32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VUINT32x4_ADD_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_ADD_DEFINED +#endif +#if !defined(VUINT32x4_SUB_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_SUB_DEFINED +#endif +#if !defined(VUINT32x4_MUL_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_MUL_DEFINED +#endif +#if !defined(VUINT32x4_AVG_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_AVG_DEFINED +#endif +#if !defined(VUINT32x4_AND_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_and(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_AND_DEFINED +#endif +#if !defined(VUINT32x4_OR_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_or(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_OR_DEFINED +#endif +#if !defined(VUINT32x4_XOR_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_xor(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_XOR_DEFINED +#endif +#if !defined(VUINT32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_CMPLT_DEFINED +#endif +#if !defined(VUINT32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_CMPEQ_DEFINED +#endif +#if !defined(VUINT32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_CMPGT_DEFINED +#endif +#if !defined(VUINT32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_CMPLE_DEFINED +#endif +#if !defined(VUINT32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_CMPGE_DEFINED +#endif +#if !defined(VUINT32x4_MIN_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_MIN_DEFINED +#endif +#if !defined(VUINT32x4_MAX_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_MAX_DEFINED +#endif +#if !defined(VUINT32x4_RSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_RSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_LRSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_LRSHIFT_DEFINED +#endif +#if !defined(VUINT32x4_LSHIFT_DEFINED) +VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 vec; + vec.altivec = (vector unsigned int)vec_sl(vec1.altivec, vec2.altivec); + return vec; +} +# define VUINT32x4_LSHIFT_DEFINED +#endif +#if !defined(VF32x4_SPLAT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x) +{ + vf32x4 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VF32x4_SPLAT_DEFINED +#endif +#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VF32x4_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_LOAD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4]) +{ + vf32x4 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VF32x4_LOAD_DEFINED +#endif +#if !defined(VF32x4_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4]) +{ + vec_st(vec.altivec, 0, x); +} +# define VF32x4_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF32x4_ADD_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_ADD_DEFINED +#endif +#if !defined(VF32x4_SUB_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_SUB_DEFINED +#endif +#if !defined(VF32x4_MUL_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MUL_DEFINED +#endif +#if !defined(VF32x4_DIV_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_div(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_DIV_DEFINED +#endif +#if !defined(VF32x4_CMPLT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPLT_DEFINED +#endif +#if !defined(VF32x4_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPEQ_DEFINED +#endif +#if !defined(VF32x4_CMPGT_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPGT_DEFINED +#endif +#if !defined(VF32x4_CMPLE_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPLE_DEFINED +#endif +#if !defined(VF32x4_CMPGE_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_CMPGE_DEFINED +#endif +#if !defined(VF32x4_MIN_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MIN_DEFINED +#endif +#if !defined(VF32x4_MAX_DEFINED) +VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2) +{ + vf32x4 vec; + vec.altivec = (vector float)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VF32x4_MAX_DEFINED +#endif +#if !defined(VF64x2_SPLAT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x) +{ + vf64x2 vec; + vec.altivec = vec_splats(x); + return vec; +} +# define VF64x2_SPLAT_DEFINED +#endif +#if !defined(VF64x2_LOAD_ALIGNED_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2]) +{ + vf64x2 vec; + vec.altivec = vec_ld(0, x); + return vec; +} +# define VF64x2_LOAD_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_LOAD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2]) +{ + vf64x2 vec; + vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x)); + return vec; +} +# define VF64x2_LOAD_DEFINED +#endif +#if !defined(VF64x2_STORE_ALIGNED_DEFINED) +VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2]) +{ + vec_st(vec.altivec, 0, x); +} +# define VF64x2_STORE_ALIGNED_DEFINED +#endif +#if !defined(VF64x2_ADD_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_add(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_ADD_DEFINED +#endif +#if !defined(VF64x2_SUB_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_sub(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_SUB_DEFINED +#endif +#if !defined(VF64x2_MUL_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_mul(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_MUL_DEFINED +#endif +#if !defined(VF64x2_DIV_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_div(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_DIV_DEFINED +#endif +#if !defined(VF64x2_CMPLT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_cmplt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_CMPLT_DEFINED +#endif +#if !defined(VF64x2_CMPEQ_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_cmpeq(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_CMPEQ_DEFINED +#endif +#if !defined(VF64x2_CMPGT_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_cmpgt(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_CMPGT_DEFINED +#endif +#if !defined(VF64x2_CMPLE_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_cmple(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_CMPLE_DEFINED +#endif +#if !defined(VF64x2_CMPGE_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_cmpge(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_CMPGE_DEFINED +#endif +#if !defined(VF64x2_MIN_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_min(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_MIN_DEFINED +#endif +#if !defined(VF64x2_MAX_DEFINED) +VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2) +{ + vf64x2 vec; + vec.altivec = (vector double)vec_max(vec1.altivec, vec2.altivec); + return vec; +} +# define VF64x2_MAX_DEFINED +#endif
--- a/include/vec/impl/x86/sse2.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/x86/sse2.h Wed Apr 30 18:36:38 2025 -0400 @@ -331,7 +331,7 @@ #endif /* ------------------------------------------------------------------------ */ -/* vint8x16 */ +/* vint16x8 */ #ifndef VINT16x8_SPLAT_DEFINED VEC_SSE2_SPLAT(/* nothing */, 16, 8) @@ -414,7 +414,7 @@ #endif /* ------------------------------------------------------------------------ */ -/* vuint8x16 */ +/* vuint16x8 */ #ifndef VUINT16x8_SPLAT_DEFINED VEC_SSE2_SPLAT(u, 16, 8)
--- a/include/vec/impl/x86/sse3.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/impl/x86/sse3.h Wed Apr 30 18:36:38 2025 -0400 @@ -31,7 +31,7 @@ VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]) \ { \ v##sign##int##bits##x##size vec; \ - vec.sse = _mm_loadu_si128((const __m128i *)in); \ + vec.sse = _mm_lddqu_si128((const __m128i *)in); \ return vec; \ }
--- a/include/vec/vec.h Tue Apr 29 16:54:13 2025 -0400 +++ b/include/vec/vec.h Wed Apr 30 18:36:38 2025 -0400 @@ -36,7 +36,10 @@ #define VEC_AVX2_ALIGNMENT 32 #define VEC_AVX512F_ALIGNMENT 64 -// for the generic implementation +/* for the generic implementation. note that due to delayed expansion, + * one for a larger implementation is basically always guaranteed to + * have at least the alignment of a smaller one (i.e. f64x8 alignment + * will be >= f64x4 alignment). This is by design. */ #define VINT8x2_ALIGNMENT 1 #define VUINT8x2_ALIGNMENT 1 @@ -79,6 +82,21 @@ #define VUINT32x16_ALIGNMENT VUINT32x8_ALIGNMENT #define VUINT64x8_ALIGNMENT VUINT64x4_ALIGNMENT +/* float */ + +#define VF32x2_ALIGNMENT 4 + +#define VF32x4_ALIGNMENT VF32x2_ALIGNMENT +#define VF64x2_ALIGNMENT 8 + +#define VF32x8_ALIGNMENT VF32x4_ALIGNMENT +#define VF64x4_ALIGNMENT VF64x2_ALIGNMENT + +#define VF32x16_ALIGNMENT VF32x8_ALIGNMENT +#define VF64x8_ALIGNMENT VF64x4_ALIGNMENT + +/* allow to suppress hardware, so that we can make sure + * the generic impl isn't *painfully* slow ;) */ #ifndef VEC_SUPPRESS_HW // IIRC `__VEC__' is also defined, but I don't know for sure. @@ -121,6 +139,14 @@ # undef VUINT64x2_ALIGNMENT # define VUINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT # endif +# if VF32x4_ALIGNMENT < VEC_ALTIVEC_ALIGNMENT +# undef VF32x4_ALIGNMENT +# define VF32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT +# endif +# if VF64x2_ALIGNMENT < VEC_ALTIVEC_ALIGNMENT +# undef VF64x2_ALIGNMENT +# define VF64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT +# endif #endif #ifdef __ARM_NEON @@ -315,12 +341,42 @@ # endif #endif -#endif /* defined(VEC_SUPPRESS_HW) */ +#endif /* !defined(VEC_SUPPRESS_HW) */ #if VEC_GNUC_ATLEAST(4, 0, 0) # define VEC_COMPILER_HAS_GCC_VECTORS +# ifdef __BIGGEST_ALIGNMENT__ +# if VINT8x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VINT8x2_ALIGNMENT +# define VINT8x2_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# if VINT16x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VINT16x2_ALIGNMENT +# define VINT16x2_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# if VINT32x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VINT32x2_ALIGNMENT +# define VINT32x2_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# if VINT64x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VINT64x2_ALIGNMENT +# define VINT64x2_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# if VF32x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VF32x4_ALIGNMENT +# define VF32x4_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# if VF64x2_ALIGNMENT < __BIGGEST_ALIGNMENT__ +# undef VF64x2_ALIGNMENT +# define VF64x2_ALIGNMENT __BIGGEST_ALIGNMENT__ +# endif +# endif #endif +/* I don't think this happens on any platform yet, but we should + * probably take extra care to make sure the alignment of each + * is at least the alignment of the one half the size... */ + #ifdef __cplusplus extern "C" { #endif @@ -355,6 +411,8 @@ return xx.d; } +/* this is the general algorithm vec uses for its average + * implementation :) */ VEC_FUNC_IMPL vec_intmax vec_imavg(vec_intmax x, vec_intmax y) { vec_intmax x_d_rem = (x % 2); @@ -392,13 +450,8 @@ #else // use unions to get an aligned offset without triggering strict aliasing # define VEC_ALIGNED_ARRAY(type, var, length, align) \ - VEC_STATIC_ASSERT(align && ((align & (align - 1)) == 0), "vec: alignment must be a power of two"); \ - union vec_aligned_union_##var##_ { \ - type arr[length]; \ - unsigned char bytes[sizeof(type) * length]; \ - }; \ - unsigned char vec_unaligned_##var##_[((length) * sizeof(type)) + (align) - 1]; \ - type *var = ((union vec_aligned_union_##var##_ *)(((vec_uintptr)vec_unaligned_##var##_ + (align - 1)) & ~(align - 1)))->arr; \ + type vec_unaligned_##var##_[length + (align) - 1]; \ + type *var = ((union vec_aligned_union_##var##_ *)(((vec_uintptr)vec_unaligned_##var##_ + (align - 1)) & ~(align - 1)))->arr; # define VEC_ALIGNED_ARRAY_SIZEOF(var, align) \ (sizeof(vec_unaligned_##var##_) - (align - 1)) #endif @@ -608,6 +661,47 @@ #define VUINT64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x8_ALIGNMENT == 0) /* --------------------------------------------------------------- */ +/* floating point */ + +#define VF32x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 2, VF32x2_ALIGNMENT) +#define VF32x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x2_ALIGNMENT) +#define VF32x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x2_ALIGNMENT) +#define VF32x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x2_ALIGNMENT == 0) + +#define VF32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 4, VF32x4_ALIGNMENT) +#define VF32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x4_ALIGNMENT) +#define VF32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x4_ALIGNMENT) +#define VF32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x4_ALIGNMENT == 0) + +#define VF32x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 8, VF32x8_ALIGNMENT) +#define VF32x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x8_ALIGNMENT) +#define VF32x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x8_ALIGNMENT) +#define VF32x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x8_ALIGNMENT == 0) + +#define VF32x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 16, VF32x16_ALIGNMENT) +#define VF32x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x16_ALIGNMENT) +#define VF32x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x16_ALIGNMENT) +#define VF32x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x16_ALIGNMENT == 0) + +/* --------------------------------------------------------------- */ +/* double precision floating point */ + +#define VF64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 2, VF64x2_ALIGNMENT) +#define VF64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x2_ALIGNMENT) +#define VF64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x2_ALIGNMENT) +#define VF64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x2_ALIGNMENT == 0) + +#define VF64x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 4, VF64x4_ALIGNMENT) +#define VF64x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x4_ALIGNMENT) +#define VF64x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x4_ALIGNMENT) +#define VF64x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x4_ALIGNMENT == 0) + +#define VF64x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 8, VF64x8_ALIGNMENT) +#define VF64x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x8_ALIGNMENT) +#define VF64x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x8_ALIGNMENT) +#define VF64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x8_ALIGNMENT == 0) + +/* --------------------------------------------------------------- */ /* Defines the structures for each vector type */ // 16-bit @@ -630,7 +724,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint8 __attribute__((__vector_size__(4))) gcc; #endif - vuint8x2 generic[2]; + vuint8x2 dbl[2]; + + vec_uint8 generic[4]; } vuint8x4; typedef union { @@ -644,7 +740,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int8 __attribute__((__vector_size__(4))) gcc; #endif - vint8x2 generic[2]; + vint8x2 dbl[2]; + + vec_int8 generic[4]; } vint8x4; typedef union { @@ -666,7 +764,9 @@ vec_uint8 __attribute__((__vector_size__(8))) gcc; #endif - vuint8x4 generic[2]; + vuint8x4 dbl[2]; + + vec_uint8 generic[8]; } vuint8x8; typedef union { @@ -680,7 +780,9 @@ vec_uint16 __attribute__((__vector_size__(8))) gcc; #endif - vuint16x2 generic[2]; + vuint16x2 dbl[2]; + + vec_uint16 generic[4]; } vuint16x4; typedef union { @@ -708,7 +810,9 @@ vec_int8 __attribute__((__vector_size__(8))) gcc; #endif - vint8x4 generic[2]; + vec_int8 generic[8]; + + vint8x4 dbl[2]; } vint8x8; typedef union { @@ -722,7 +826,9 @@ vec_int16 __attribute__((__vector_size__(8))) gcc; #endif - vint16x2 generic[2]; + vec_int16 generic[4]; + + vint16x2 dbl[2]; } vint16x4; typedef union { @@ -753,7 +859,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint8 __attribute__((__vector_size__(16))) gcc; #endif - vuint8x8 generic[2]; + vuint8x8 dbl[2]; + + vec_uint8 generic[16]; } vuint8x16; typedef union { @@ -769,7 +877,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint16 __attribute__((__vector_size__(16))) gcc; #endif - vuint16x4 generic[2]; + vuint16x4 dbl[2]; + + vec_uint16 generic[8]; } vuint16x8; typedef union { @@ -785,7 +895,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint32 __attribute__((__vector_size__(16))) gcc; #endif - vuint32x2 generic[2]; + vuint32x2 dbl[2]; + + vec_uint32 generic[4]; } vuint32x4; typedef union { @@ -817,7 +929,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int8 __attribute__((__vector_size__(16))) gcc; #endif - vint8x8 generic[2]; + vint8x8 dbl[2]; + + vec_int8 generic[16]; } vint8x16; typedef union { @@ -833,7 +947,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int16 __attribute__((__vector_size__(16))) gcc; #endif - vint16x4 generic[2]; + vint16x4 dbl[2]; + + vec_int16 generic[8]; } vint16x8; typedef union { @@ -849,7 +965,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int32 __attribute__((__vector_size__(16))) gcc; #endif - vint32x2 generic[2]; + vint32x2 dbl[2]; + + vec_int32 generic[4]; } vint32x4; typedef union { @@ -876,7 +994,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint8 __attribute__((__vector_size__(32))) gcc; #endif - vuint8x16 generic[2]; + vuint8x16 dbl[2]; + + vec_uint8 generic[32]; } vuint8x32; typedef union { @@ -886,7 +1006,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint16 __attribute__((__vector_size__(32))) gcc; #endif - vuint16x8 generic[2]; + vuint16x8 dbl[2]; + + vec_uint16 generic[16]; } vuint16x16; typedef union { @@ -896,7 +1018,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint32 __attribute__((__vector_size__(32))) gcc; #endif - vuint32x4 generic[2]; + vuint32x4 dbl[2]; + + vec_uint32 generic[8]; } vuint32x8; typedef union { @@ -906,7 +1030,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint64 __attribute__((__vector_size__(32))) gcc; #endif - vuint64x2 generic[2]; + vuint64x2 dbl[2]; + + vec_uint64 generic[4]; } vuint64x4; typedef union { @@ -916,7 +1042,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int8 __attribute__((__vector_size__(32))) gcc; #endif - vint8x16 generic[2]; + vint8x16 dbl[2]; + + vec_int8 generic[32]; } vint8x32; typedef union { @@ -926,7 +1054,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int16 __attribute__((__vector_size__(32))) gcc; #endif - vint16x8 generic[2]; + vint16x8 dbl[2]; + + vec_int16 generic[16]; } vint16x16; typedef union { @@ -936,7 +1066,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int32 __attribute__((__vector_size__(32))) gcc; #endif - vint32x4 generic[2]; + vint32x4 dbl[2]; + + vec_int32 generic[8]; } vint32x8; typedef union { @@ -946,7 +1078,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int64 __attribute__((__vector_size__(32))) gcc; #endif - vint64x2 generic[2]; + vint64x2 dbl[2]; + + vec_int64 generic[4]; } vint64x4; // 512-bit @@ -957,7 +1091,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint8 __attribute__((__vector_size__(64))) gcc; #endif - vuint8x32 generic[2]; + vuint8x32 dbl[2]; + + vec_uint8 generic[64]; } vuint8x64; typedef union { @@ -967,7 +1103,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint16 __attribute__((__vector_size__(64))) gcc; #endif - vuint16x16 generic[2]; + vuint16x16 dbl[2]; + + vec_uint16 generic[32]; } vuint16x32; typedef union { @@ -977,7 +1115,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint32 __attribute__((__vector_size__(64))) gcc; #endif - vuint32x8 generic[2]; + vuint32x8 dbl[2]; + + vec_uint32 generic[16]; } vuint32x16; typedef union { @@ -987,7 +1127,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_uint64 __attribute__((__vector_size__(64))) gcc; #endif - vuint64x4 generic[2]; + vuint64x4 dbl[2]; + + vec_uint64 generic[8]; } vuint64x8; typedef union { @@ -997,7 +1139,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int8 __attribute__((__vector_size__(64))) gcc; #endif - vint8x32 generic[2]; + vint8x32 dbl[2]; + + vec_int8 generic[64]; } vint8x64; typedef union { @@ -1007,7 +1151,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int16 __attribute__((__vector_size__(64))) gcc; #endif - vint16x16 generic[2]; + vint16x16 dbl[2]; + + vec_int16 generic[32]; } vint16x32; typedef union { @@ -1017,7 +1163,9 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int32 __attribute__((__vector_size__(64))) gcc; #endif - vint32x8 generic[2]; + vint32x8 dbl[2]; + + vec_int32 generic[16]; } vint32x16; typedef union { @@ -1027,9 +1175,84 @@ #ifdef VEC_COMPILER_HAS_GCC_VECTORS vec_int64 __attribute__((__vector_size__(64))) gcc; #endif - vint64x4 generic[2]; + vint64x4 dbl[2]; + + vec_int64 generic[8]; } vint64x8; +/* ------- Floating-point types */ + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f32 __attribute__((__vector_size__(8))) gcc; +#endif + vec_f32 generic[2]; +} vf32x2; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f32 __attribute__((__vector_size__(16))) gcc; +#endif +#ifdef VEC_COMPILER_HAS_ALTIVEC + vector float altivec; +#endif + + vf32x2 dbl[2]; + + vec_f32 generic[4]; +} vf32x4; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f32 __attribute__((__vector_size__(32))) gcc; +#endif + + vf32x4 dbl[2]; + + vec_f32 generic[8]; +} vf32x8; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f32 __attribute__((__vector_size__(64))) gcc; +#endif + + vf32x8 dbl[2]; + + vec_f32 generic[16]; +} vf32x16; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f64 __attribute__((__vector_size__(16))) gcc; +#endif +#ifdef VEC_COMPILER_HAS_ALTIVEC_VSX + vector double altivec; +#endif + + vec_f64 generic[2]; +} vf64x2; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f64 __attribute__((__vector_size__(32))) gcc; +#endif + + vf64x2 dbl[2]; + + vec_f64 generic[4]; +} vf64x4; + +typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_f64 __attribute__((__vector_size__(64))) gcc; +#endif + + vf64x4 dbl[2]; + + vec_f64 generic[8]; +} vf64x8; + /* ------------------------------------------------------------------------ */ /* x86 */ @@ -1064,6 +1287,10 @@ /* ------------------------------------------------------------------------ */ /* PowerPC */ +#ifdef VEC_COMPILER_HAS_ALTIVEC_VSX +# include "impl/ppc/vsx.h" +#endif + #ifdef VEC_COMPILER_HAS_ALTIVEC # include "impl/ppc/altivec.h" #endif @@ -1083,9 +1310,12 @@ # include "impl/gcc.h" #endif +/*we don't need to double here, because gcc defines literally everything :)*/ + +/* ------------------------------------------------------------------------ */ /* Fill in anything remaining with a generic array-based implementation. */ + #include "impl/generic.h" -#include "impl/double.h" /* ------------------------------------------------------------------------ */
--- a/test/Makefile.template Tue Apr 29 16:54:13 2025 -0400 +++ b/test/Makefile.template Wed Apr 30 18:36:38 2025 -0400 @@ -1,12 +1,14 @@ CPPFLAGS += -g -O2 -I../include -Wall CFLAGS += $(CPPFLAGS) -std=c99 CXXFLAGS += $(CPPFLAGS) -std=c++11 +LDADD += -lm HEADERS = ../include/vec/vec.h \ ../include/vec/cpu.h \ ../include/vec/mem.h \ ../include/vec/defs.h \ ../include/vec/impl/ppc/altivec.h \ + ../include/vec/impl/ppc/vsx.h \ ../include/vec/impl/x86/avx2.h \ ../include/vec/impl/x86/avx512f.h \ ../include/vec/impl/x86/avx512bw.h \ @@ -38,13 +40,13 @@ $(CXX) $(CXXFLAGS) -c -o $@ $< test-generic: test.o test_benchmark_simple.o test_benchmark_vec.o - $(CC) $(LDFLAGS) -o $@ $^ + $(CC) $(LDFLAGS) -o $@ $^ $(LDADD) test-host: test.o test_benchmark_simple.o test_benchmark_vec.o - $(CC) $(LDFLAGS) -o $@ $^ + $(CC) $(LDFLAGS) -o $@ $^ $(LDADD) test-cxx: test-cxx.o $(HEADERS) - $(CXX) $(LDFLAGS) -o $@ $< + $(CXX) $(LDFLAGS) -o $@ $< $(LDADD) clean: $(RM) $(BINS) $(OBJS)
--- a/test/test.c Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test.c Wed Apr 30 18:36:38 2025 -0400 @@ -40,6 +40,11 @@ UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFE), UINT32_C( 0), UINT32_C( 1), }; +static const float testvalf32[] = { + 1.0f, -3.33f, -4096.0f, 1234.0f, + 90.0f, -12.0f, 60.0f, 10224.0f, +}; + static const int64_t testval64[] = { INT64_MAX, INT64_C(-3), INT64_C(0x00000000), INT64_C(0xFFFFFFFFF), INT64_MIN, INT64_C(645366), INT64_C(0x12345ABCDE), INT64_C(0xF00000FFF), @@ -50,36 +55,44 @@ UINT64_C(0xff), UINT64_C(645366), UINT64_C(0x12345ABCDE), UINT64_C(0xF00000FFF), }; -#define VTEST(sign, csign, bits, size) \ - static inline v##sign##int##bits##x##size vtest##sign##bits##x##size(const size_t start) \ +static const double testvalf64[] = { + 2345734.0, 12498.0, 12.0, -12312.0, + -5.0, 12.234, 3.1415, 2.71828, +}; + +#define VTEST(shorttype, type, ctype, bits, size) \ + static inline v##type##bits##x##size vtest##shorttype##bits##x##size(const size_t start) \ { \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(x); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(x); \ for (size_t i = 0; i < size; i++) \ - x[i] = testval##sign##bits[(start + i) % ARRAY_SIZE(testval##sign##bits)]; \ - return v##sign##int##bits##x##size##_load_aligned(x); \ + x[i] = testval##shorttype##bits[(start + i) % ARRAY_SIZE(testval##shorttype##bits)]; \ + return v##type##bits##x##size##_load_aligned(x); \ } -#define VPRINT(sign, csign, psign, bits, size) \ - static inline void print_v##sign##int##bits##x##size(FILE *file, v##sign##int##bits##x##size vec) \ +#define VPRINT(type, ctype, print, bits, size) \ + static inline void print_v##type##bits##x##size(FILE *file, v##type##bits##x##size vec) \ { \ fputs("vector: ", file); \ \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(v); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(v); \ \ - v##sign##int##bits##x##size##_store_aligned(vec, v); \ + v##type##bits##x##size##_store_aligned(vec, v); \ \ - fprintf(file, "%" PRI ## psign ## bits, v[0]); \ + fprintf(file, "%" print, v[0]); \ \ for (int i = 1; i < size; i++) \ - fprintf(file, ", %" PRI ## psign ## bits, v[i]); \ + fprintf(file, ", %" print, v[i]); \ \ fputs("\n", file); \ - \ } #define DEF_VEC_TEST_FUNCS(bits, size) \ - VTEST(, , bits, size) VTEST(u, U, bits, size) \ - VPRINT(, , d, bits, size) VPRINT(u, U, u, bits, size) + VTEST(, int, INT, bits, size) VTEST(u, uint, UINT, bits, size) \ + VPRINT(int, INT, PRI##d##bits, bits, size) VPRINT(uint, UINT, PRI##u##bits, bits, size) + +#define DEF_VEC_TEST_FUNC_FLOAT(bits, size) \ + VTEST(f, f, F, bits, size) \ + VPRINT(f, F, "f", bits, size) DEF_VEC_TEST_FUNCS(8, 2) @@ -105,18 +118,25 @@ DEF_VEC_TEST_FUNCS(32, 16) DEF_VEC_TEST_FUNCS(64, 8) +DEF_VEC_TEST_FUNC_FLOAT(32, 2) +DEF_VEC_TEST_FUNC_FLOAT(32, 4) +DEF_VEC_TEST_FUNC_FLOAT(32, 8) +DEF_VEC_TEST_FUNC_FLOAT(32, 16) + +DEF_VEC_TEST_FUNC_FLOAT(64, 2) +DEF_VEC_TEST_FUNC_FLOAT(64, 4) +DEF_VEC_TEST_FUNC_FLOAT(64, 8) + #undef DEF_VEC_TEST_FUNCS #undef VPRINT #undef VTEST // ------------------------------------------------------------ -#if 0 #include "test_align.h" #include "test_arith.h" #include "test_compare.h" #include "test_shift.h" -#endif #include "test_benchmark.h" // ------------------------------------------------------------ @@ -127,12 +147,10 @@ srand(time(NULL)); -#if 0 ret |= test_align(); ret |= test_arith(); ret |= test_compare(); ret |= test_shift(); -#endif test_benchmark();
--- a/test/test_align.h Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_align.h Wed Apr 30 18:36:38 2025 -0400 @@ -2,34 +2,35 @@ { int ret = 0; -#define RUN_TEST(sign, csign, bits, size) \ +#define RUN_TEST(type, ctype, bits, size) \ do { \ + int i; \ /* allocate the aligned array */ \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr); \ \ /* fill the values */ \ - for (int i = 0; i < size; i++) \ + for (i = 0; i < size; i++) \ vec_arr[i] = i; \ \ /* try to load it */ \ - v##sign##int##bits##x##size vec = v##sign##int##bits##x##size##_load_aligned(vec_arr); \ + v##type##bits##x##size vec = v##type##bits##x##size##_load(vec_arr); \ \ /* now allocate an output array */ \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \ \ /* try storing it */ \ - v##sign##int##bits##x##size##_store_aligned(vec, vec_arr_out); \ + v##type##bits##x##size##_store_aligned(vec, vec_arr_out); \ \ /* mark success or failure */ \ - ret |= !!memcmp(vec_arr, vec_arr_out, size * sizeof(*vec_arr)); \ + ret |= !!memcmp(vec_arr, vec_arr_out, size * (bits / 8)); \ \ - ret |= !V##csign##INT##bits##x##size##_PTR_ALIGNED(vec_arr); \ - ret |= !V##csign##INT##bits##x##size##_PTR_ALIGNED(vec_arr_out); \ + ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr); \ + ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr_out); \ } while (0); #define RUN_TESTS(bits, size) \ - RUN_TEST( , , bits, size) \ - RUN_TEST(u, U, bits, size) + RUN_TEST(int, INT, bits, size) \ + RUN_TEST(uint, UINT, bits, size) RUN_TESTS(8, 2) @@ -56,6 +57,17 @@ RUN_TESTS(64, 8) #undef RUN_TESTS + + /* floating point */ + RUN_TEST(f, F, 32, 2) + RUN_TEST(f, F, 32, 4) + RUN_TEST(f, F, 32, 8) + RUN_TEST(f, F, 32, 16) + + RUN_TEST(f, F, 64, 2) + RUN_TEST(f, F, 64, 4) + RUN_TEST(f, F, 64, 8) + #undef RUN_TEST return ret;
--- a/test/test_arith.h Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_arith.h Wed Apr 30 18:36:38 2025 -0400 @@ -1,22 +1,22 @@ -#define CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, secondsign, secondcsign) \ - static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##secondsign##int##bits##x##size b) \ +#define CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, secondtype, secondctype) \ + static int test_arith_v##type##bits##x##size##_##op(v##type##bits##x##size a, v##secondtype##bits##x##size b) \ { \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \ - V##secondcsign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_a); \ + V##secondctype##bits##x##size##_ALIGNED_ARRAY(orig_b); \ + V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_c); \ \ - v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \ + v##type##bits##x##size c = v##type##bits##x##size##_##op(a, b); \ \ - v##sign##int##bits##x##size##_store_aligned(a, orig_a); \ - v##secondsign##int##bits##x##size##_store_aligned(b, orig_b); \ - v##sign##int##bits##x##size##_store_aligned(c, orig_c); \ + v##type##bits##x##size##_store_aligned(a, orig_a); \ + v##secondtype##bits##x##size##_store_aligned(b, orig_b); \ + v##type##bits##x##size##_store_aligned(c, orig_c); \ \ for (int i = 0; i < size; i++) { \ - if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \ - fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, #equiv, (vec_##sign##int##bits)(equiv), orig_c[i]); \ - print_v##sign##int##bits##x##size(stderr,a); \ - print_v##secondsign##int##bits##x##size(stderr,b); \ - print_v##sign##int##bits##x##size(stderr,c); \ + if ((vec_##type##bits)(equiv) != orig_c[i]) { \ + fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" print "] does not equal result [%" print "]!\n", i, #equiv, (vec_##type##bits)(equiv), orig_c[i]); \ + print_v##type##bits##x##size(stderr,a); \ + print_v##secondtype##bits##x##size(stderr,b); \ + print_v##type##bits##x##size(stderr,c); \ fprintf(stderr, "\n"); \ return 1; \ } \ @@ -25,31 +25,41 @@ return 0; \ } -#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \ - CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, sign, csign) +#define CREATE_TEST(type, ctype, print, bits, size, op, equiv) \ + CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, type, ctype) -#define CREATE_TEST_SHIFT(sign, psign, csign, bits, size, op, equiv) \ - CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, u, U) +#define CREATE_TEST_SHIFT(type, ctype, print, bits, size, op, equiv) \ + CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, uint, UINT) -#define CREATE_TESTS_SIGN(sign, psign, csign, bits, size) \ - CREATE_TEST(sign, psign, csign, bits, size, add, orig_a[i] + orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, sub, orig_a[i] - orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, mul, orig_a[i] * orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \ - CREATE_TEST(sign, psign, csign, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \ - CREATE_TEST(sign, psign, csign, bits, size, and, orig_a[i] & orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, or, orig_a[i] | orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, xor, orig_a[i] ^ orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, avg, (vec_##sign##int##bits)vec_im##sign##avg(orig_a[i], orig_b[i])) \ - CREATE_TEST_SHIFT(sign, psign, csign, bits, size, rshift, vec_##sign##rshift(orig_a[i], orig_b[i])) \ - CREATE_TEST_SHIFT(sign, psign, csign, bits, size, lshift, vec_##sign##lshift(orig_a[i], orig_b[i])) \ - CREATE_TEST_SHIFT(sign, psign, csign, bits, size, lrshift, vec_urshift((vec_uint##bits)orig_a[i], orig_b[i])) \ - CREATE_TEST(sign, psign, csign, bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \ - CREATE_TEST(sign, psign, csign, bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i]) +#define CREATE_TESTS_INT(type, ctype, sign, print, bits, size) \ + CREATE_TEST(type, ctype, print, bits, size, add, orig_a[i] + orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, sub, orig_a[i] - orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, mul, orig_a[i] * orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \ + CREATE_TEST(type, ctype, print, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \ + CREATE_TEST(type, ctype, print, bits, size, and, orig_a[i] & orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, or, orig_a[i] | orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, xor, orig_a[i] ^ orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, avg, (vec_##type##bits)vec_im##sign##avg(orig_a[i], orig_b[i])) \ + CREATE_TEST_SHIFT(type, ctype, print, bits, size, rshift, vec_##sign##rshift(orig_a[i], orig_b[i])) \ + CREATE_TEST_SHIFT(type, ctype, print, bits, size, lshift, vec_##sign##lshift(orig_a[i], orig_b[i])) \ + CREATE_TEST_SHIFT(type, ctype, print, bits, size, lrshift, vec_urshift((vec_uint##bits)orig_a[i], orig_b[i])) \ + CREATE_TEST(type, ctype, print, bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \ + CREATE_TEST(type, ctype, print, bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i]) + +#define CREATE_TESTS_FLOAT(bits, size) \ + CREATE_TEST(f, F, "f", bits, size, add, orig_a[i] + orig_b[i]) \ + CREATE_TEST(f, F, "f", bits, size, sub, orig_a[i] - orig_b[i]) \ + CREATE_TEST(f, F, "f", bits, size, mul, orig_a[i] * orig_b[i]) \ + CREATE_TEST(f, F, "f", bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \ + CREATE_TEST(f, F, "f", bits, size, mod, (orig_b[i]) ? (fmod(orig_a[i], orig_b[i])) : 0) \ + CREATE_TEST(f, F, "f", bits, size, avg, (orig_a[i] + orig_b[i]) / 2) \ + CREATE_TEST(f, F, "f", bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \ + CREATE_TEST(f, F, "f", bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i]) #define CREATE_TESTS(bits, size) \ - CREATE_TESTS_SIGN(, d, , bits, size) \ - CREATE_TESTS_SIGN(u, u, U, bits, size) + CREATE_TESTS_INT(int, INT, /* nothing */, PRI##d##bits, bits, size) \ + CREATE_TESTS_INT(uint, UINT, u, PRI##u##bits, bits, size) CREATE_TESTS(8, 2) @@ -75,7 +85,17 @@ CREATE_TESTS(32, 16) CREATE_TESTS(64, 8) -#undef CREATE_TESTS_SIGN +CREATE_TESTS_FLOAT(32, 2) +CREATE_TESTS_FLOAT(32, 4) +CREATE_TESTS_FLOAT(32, 8) +CREATE_TESTS_FLOAT(32, 16) + +CREATE_TESTS_FLOAT(64, 2) +CREATE_TESTS_FLOAT(64, 4) +CREATE_TESTS_FLOAT(64, 8) + +#undef CREATE_TESTS_INT +#undef CREATE_TESTS_FLOAT #undef CREATE_TESTS #undef CREATE_TEST #undef CREATE_TEST_SHIFT @@ -84,38 +104,54 @@ { int ret = 0; -#define RUN_TESTS_SIGN(sign, bits, size) \ - for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \ - const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \ - for (size_t j = 0U; j < ARRAY_SIZE(testval##sign##bits); j++) { \ - const v##sign##int##bits##x##size b = vtest##sign##bits##x##size(j); \ - ret |= test_arith_v##sign##int##bits##x##size##_add(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_sub(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_mul(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_div(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_mod(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_and(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_or(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_xor(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_avg(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_min(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_max(a, b); \ +#define RUN_TESTS_SIGN(shorttype, type, bits, size) \ + for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \ + const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \ + for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \ + const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \ + ret |= test_arith_v##type##bits##x##size##_add(a, b); \ + ret |= test_arith_v##type##bits##x##size##_sub(a, b); \ + ret |= test_arith_v##type##bits##x##size##_mul(a, b); \ + ret |= test_arith_v##type##bits##x##size##_div(a, b); \ + ret |= test_arith_v##type##bits##x##size##_mod(a, b); \ + ret |= test_arith_v##type##bits##x##size##_and(a, b); \ + ret |= test_arith_v##type##bits##x##size##_or(a, b); \ + ret |= test_arith_v##type##bits##x##size##_xor(a, b); \ + ret |= test_arith_v##type##bits##x##size##_avg(a, b); \ + ret |= test_arith_v##type##bits##x##size##_min(a, b); \ + ret |= test_arith_v##type##bits##x##size##_max(a, b); \ } \ } \ \ - for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \ - const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \ + for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \ + const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \ for (uint32_t j = 0U; j < bits; j++) { \ const vuint##bits##x##size b = vuint##bits##x##size##_splat(j); \ - ret |= test_arith_v##sign##int##bits##x##size##_rshift(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_lshift(a, b); \ - ret |= test_arith_v##sign##int##bits##x##size##_lrshift(a, b); \ + ret |= test_arith_v##type##bits##x##size##_rshift(a, b); \ + ret |= test_arith_v##type##bits##x##size##_lshift(a, b); \ + ret |= test_arith_v##type##bits##x##size##_lrshift(a, b); \ } \ } #define RUN_TESTS(bits, size) \ - RUN_TESTS_SIGN( , bits, size) \ - RUN_TESTS_SIGN(u, bits, size) + RUN_TESTS_SIGN( , int, bits, size) \ + RUN_TESTS_SIGN(u, uint, bits, size) + +#define RUN_TESTS_FLOAT(shorttype, type, bits, size) \ + for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \ + const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \ + for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \ + const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \ + ret |= test_arith_v##type##bits##x##size##_add(a, b); \ + ret |= test_arith_v##type##bits##x##size##_sub(a, b); \ + ret |= test_arith_v##type##bits##x##size##_mul(a, b); \ + ret |= test_arith_v##type##bits##x##size##_div(a, b); \ + ret |= test_arith_v##type##bits##x##size##_mod(a, b); \ + ret |= test_arith_v##type##bits##x##size##_avg(a, b); \ + ret |= test_arith_v##type##bits##x##size##_min(a, b); \ + ret |= test_arith_v##type##bits##x##size##_max(a, b); \ + } \ + } RUN_TESTS(8, 2) @@ -141,6 +177,16 @@ RUN_TESTS(32, 16) RUN_TESTS(64, 8) + RUN_TESTS_FLOAT(f, f, 32, 2) + RUN_TESTS_FLOAT(f, f, 32, 4) + RUN_TESTS_FLOAT(f, f, 32, 8) + RUN_TESTS_FLOAT(f, f, 32, 16) + + RUN_TESTS_FLOAT(f, f, 64, 2) + RUN_TESTS_FLOAT(f, f, 64, 4) + RUN_TESTS_FLOAT(f, f, 64, 8) + +#undef RUN_TESTS_FLOAT #undef RUN_TESTS_SIGN #undef RUN_TESTS
--- a/test/test_benchmark.h Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_benchmark.h Wed Apr 30 18:36:38 2025 -0400 @@ -2,37 +2,115 @@ /* ------------------------------------------------------------------------ */ /* simple benchmark for getting the min/max range of an audio sample. */ -extern void test_benchmark_sample_minmax_simple_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax); -extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax); +extern void test_benchmark_sample_minmax_int8x2_impl(vec_int8 *smpl, uint32_t length, vec_int8 *pmin, vec_int8 *pmax); VEC_FUNC_IMPL void test_benchmark_sample_minmax(void) { - int32_t min, max; - clock_t start, end; int i; - int16_t *q = vec_malloc(16000001u * 2u); + + union { + vec_int8 int8[16000001]; + vec_uint8 uint8[16000001]; + vec_int16 int16[16000001]; + vec_uint16 uint16[16000001]; + vec_int32 int32[16000001]; + vec_uint32 uint32[16000001]; + vec_int64 int64[16000001]; + vec_uint64 uint64[16000001]; + vec_f32 f32[16000001]; + vec_f64 f64[16000001]; + } *q; + + q = vec_malloc(sizeof(*q)); + + for (i = 0; i < 16000001; i++) + q->f64[i] = (double)rand() / RAND_MAX; - printf("\nsigned 16-bit audio sample min/max - 1 thousand passes - 16000001 samples\n\n"); + printf("\naudio sample min/max - 1 thousand passes - 16000001 samples\n\n"); - start = clock(); - for (i = 0; i < 100; i++) { - min = INT32_MAX; - max = INT32_MIN; - test_benchmark_sample_minmax_vec_impl(q, 16000001u, &min, &max); +#define DO_TIMER(TYPE,NAME,MIN,MAX) \ + { \ + vec_##TYPE min, max; \ + clock_t start, end; \ + \ + start = clock(); \ + \ + for (i = 0; i < 1000; i++) { \ + extern void test_benchmark_sample_minmax_##NAME##_impl(vec_##TYPE *smpl, uint32_t length, vec_##TYPE *pmin, vec_##TYPE *pmax); \ + \ + min = MAX; \ + max = MIN; \ + \ + test_benchmark_sample_minmax_##NAME##_impl(q->TYPE, 16000001u, &min, &max); \ + } \ + \ + end = clock(); \ + \ + printf("- %s: took %f secs\n", #NAME, (double)(end - start) / CLOCKS_PER_SEC); \ } - end = clock(); + + DO_TIMER(int8, int8x2, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8x4, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8x8, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8x16, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8x32, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8x64, INT8_MIN, INT8_MAX) + DO_TIMER(int8, int8, INT8_MIN, INT8_MAX) - printf("- vec: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC); + DO_TIMER(int16, int16x2, INT16_MIN, INT16_MAX) + DO_TIMER(int16, int16x4, INT16_MIN, INT16_MAX) + DO_TIMER(int16, int16x8, INT16_MIN, INT16_MAX) + DO_TIMER(int16, int16x16, INT16_MIN, INT16_MAX) + DO_TIMER(int16, int16x32, INT16_MIN, INT16_MAX) + DO_TIMER(int16, int16, INT16_MIN, INT16_MAX) + + DO_TIMER(int32, int32x2, INT32_MIN, INT32_MAX) + DO_TIMER(int32, int32x4, INT32_MIN, INT32_MAX) + DO_TIMER(int32, int32x8, INT32_MIN, INT32_MAX) + DO_TIMER(int32, int32x16, INT32_MIN, INT32_MAX) + DO_TIMER(int32, int32, INT32_MIN, INT32_MAX) + + DO_TIMER(int64, int64x2, INT64_MIN, INT64_MAX) + DO_TIMER(int64, int64x4, INT64_MIN, INT64_MAX) + DO_TIMER(int64, int64x8, INT64_MIN, INT64_MAX) + DO_TIMER(int64, int64, INT64_MIN, INT64_MAX) - start = clock(); - for (i = 0; i < 100; i++) { - min = INT32_MAX; - max = INT32_MIN; - test_benchmark_sample_minmax_simple_impl(q, 16000001u, &min, &max); - } - end = clock(); + DO_TIMER(uint8, uint8x2, 0, UINT8_MAX) + DO_TIMER(uint8, uint8x4, 0, UINT8_MAX) + DO_TIMER(uint8, uint8x8, 0, UINT8_MAX) + DO_TIMER(uint8, uint8x16, 0, UINT8_MAX) + DO_TIMER(uint8, uint8x32, 0, UINT8_MAX) + DO_TIMER(uint8, uint8x64, 0, UINT8_MAX) + DO_TIMER(uint8, uint8, 0, UINT8_MAX) + + DO_TIMER(uint16, uint16x2, 0, UINT16_MAX) + DO_TIMER(uint16, uint16x4, 0, UINT16_MAX) + DO_TIMER(uint16, uint16x8, 0, UINT16_MAX) + DO_TIMER(uint16, uint16x16, 0, UINT16_MAX) + DO_TIMER(uint16, uint16x32, 0, UINT16_MAX) + DO_TIMER(uint16, uint16, 0, UINT16_MAX) - printf("- simple: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC); + DO_TIMER(uint32, uint32x2, 0, UINT32_MAX) + DO_TIMER(uint32, uint32x4, 0, UINT32_MAX) + DO_TIMER(uint32, uint32x8, 0, UINT32_MAX) + DO_TIMER(uint32, uint32x16, 0, UINT32_MAX) + DO_TIMER(uint32, uint32, 0, UINT32_MAX) + + DO_TIMER(uint64, uint64x2, 0, UINT64_MAX) + DO_TIMER(uint64, uint64x4, 0, UINT64_MAX) + DO_TIMER(uint64, uint64x8, 0, UINT64_MAX) + DO_TIMER(uint64, uint64, 0, UINT64_MAX) + + DO_TIMER(f32, f32x2, -1.0f, 1.0f) + DO_TIMER(f32, f32x4, -1.0f, 1.0f) + DO_TIMER(f32, f32x8, -1.0f, 1.0f) + DO_TIMER(f32, f32x16, -1.0f, 1.0f) + DO_TIMER(f32, f32, -1.0f, 1.0f) + + DO_TIMER(f64, f64x2, -1.0, 1.0) + DO_TIMER(f64, f64x4, -1.0, 1.0) + DO_TIMER(f64, f64x8, -1.0, 1.0) + DO_TIMER(f64, f64, -1.0, 1.0) printf("\n");
--- a/test/test_benchmark_simple.c Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_benchmark_simple.c Wed Apr 30 18:36:38 2025 -0400 @@ -1,18 +1,30 @@ -#include <stdint.h> +#include "vec/defs.h" -extern void test_benchmark_sample_minmax_simple_impl(int16_t *smpl, - uint32_t length, int32_t *pmin, int32_t *pmax) -{ - int32_t min = *pmin; - int32_t max = *pmax; - - while (length--) { - if (*smpl < min) min = *smpl; - if (*smpl > max) max = *smpl; - - smpl++; +#define DEFINE_VARIANT(type, bits) \ + extern void test_benchmark_sample_minmax_##type##bits##_impl(vec_##type##bits *smpl, \ + uint32_t length, vec_##type##bits *pmin, vec_##type##bits *pmax) \ + { \ + vec_##type##bits min = *pmin; \ + vec_##type##bits max = *pmax; \ + \ + while (length--) { \ + if (*smpl < min) min = *smpl; \ + if (*smpl > max) max = *smpl; \ + \ + smpl++; \ + } \ + \ + *pmin = min; \ + *pmax = max; \ } - *pmin = min; - *pmax = max; -} +DEFINE_VARIANT(int, 8) +DEFINE_VARIANT(uint, 8) +DEFINE_VARIANT(int, 16) +DEFINE_VARIANT(uint, 16) +DEFINE_VARIANT(int, 32) +DEFINE_VARIANT(uint, 32) +DEFINE_VARIANT(f, 32) +DEFINE_VARIANT(int, 64) +DEFINE_VARIANT(uint, 64) +DEFINE_VARIANT(f, 64)
--- a/test/test_benchmark_vec.c Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_benchmark_vec.c Wed Apr 30 18:36:38 2025 -0400 @@ -1,43 +1,97 @@ #include "vec/vec.h" -extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, - uint32_t length, int32_t *pmin, int32_t *pmax) -{ - int32_t smin = INT32_MAX, smax = INT32_MIN; - uint32_t len32; - int i; - vint16x8 min = vint16x8_splat(*pmin); - vint16x8 max = vint16x8_splat(*pmax); - VINT16x8_ALIGNED_ARRAY(mins); - VINT16x8_ALIGNED_ARRAY(maxs); - - len32 = length / 8; - while (len32--) { - vint16x8 vec = vint16x8_load_aligned(smpl); - - min = vint16x8_min(vec, min); - max = vint16x8_max(vec, max); - - smpl += 8; +#define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \ + extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \ + uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \ + { \ + vec_##TYPE##BITS smin = MAX, smax = MIN; \ + uint32_t len32; \ + int i; \ + v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \ + v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \ + V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \ + V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \ + \ + len32 = length / SIZE; \ + while (len32--) { \ + v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \ + \ + min = v##TYPE##BITS##x##SIZE##_min(vec, min); \ + max = v##TYPE##BITS##x##SIZE##_max(vec, max); \ + \ + smpl += SIZE; \ + } \ + \ + v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \ + v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \ + \ + /* get the lowest minimum of what we have left */ \ + for (i = 0; i < SIZE; i++) { \ + if (mins[i] < smin) smin = mins[i]; \ + if (maxs[i] > smax) smax = maxs[i]; \ + } \ + \ + len32 = length % SIZE; \ + while (len32--) { \ + if (*smpl < smin) smin = *smpl; \ + if (*smpl > smax) smax = *smpl; \ + \ + smpl++; \ + } \ + \ + *pmin = smin; \ + *pmax = smax; \ } - vint16x8_store_aligned(min, mins); - vint16x8_store_aligned(max, maxs); +DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN) - /* get the lowest minimum of what we have left */ - for (i = 0; i < 8; i++) { - if (mins[i] < smin) smin = mins[i]; - if (maxs[i] > smax) smax = maxs[i]; - } +DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN) + +DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN) + +DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN) - len32 = length % 8; - while (len32--) { - if (*smpl < smin) smin = *smpl; - if (*smpl > smax) smax = *smpl; +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0) + +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0) - smpl++; - } +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0) + +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0) - *pmin = smin; - *pmax = smax; -} +DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f) + +DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0) +DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0) +DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)
--- a/test/test_compare.h Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_compare.h Wed Apr 30 18:36:38 2025 -0400 @@ -1,21 +1,21 @@ -#define CREATE_TEST(sign, psign, bits, size, op, equiv) \ - static int test_compare_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##sign##int##bits##x##size b) \ +#define CREATE_TEST(type, print, bits, size, op, equiv) \ + static int test_compare_v##type##bits##x##size##_##op(v##type##bits##x##size a, v##type##bits##x##size b) \ { \ - sign##int##bits##_t orig_a[size], orig_b[size], orig_c[size]; \ + vec_##type##bits orig_a[size], orig_b[size], orig_c[size]; \ \ - v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \ + v##type##bits##x##size c = v##type##bits##x##size##_##op(a, b); \ \ - v##sign##int##bits##x##size##_store(a, orig_a); \ - v##sign##int##bits##x##size##_store(b, orig_b); \ - v##sign##int##bits##x##size##_store(c, orig_c); \ + v##type##bits##x##size##_store(a, orig_a); \ + v##type##bits##x##size##_store(b, orig_b); \ + v##type##bits##x##size##_store(c, orig_c); \ \ for (int i = 0; i < size; i++) { \ - if ((vec_##sign##int##bits)(((equiv) ? UINT##bits##_MAX : 0)) != orig_c[i]) { \ - printf("%lld %lld\n", (long long)(vec_##sign##int##bits)(((equiv) ? UINT##bits##_MAX : 0)), (long long)orig_c[i]); \ - fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%d] does not equal result [%" PRI ## psign ## bits "]!\n", i, equiv, orig_c[i]); \ - print_v##sign##int##bits##x##size(stderr,a); \ - print_v##sign##int##bits##x##size(stderr,b); \ - print_v##sign##int##bits##x##size(stderr,c); \ + uint##bits##_t res = (((equiv) ? UINT##bits##_MAX : 0)); \ + if (memcmp(&res, orig_c + i, sizeof(res))) { \ + fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%d] does not equal result [%" print "]!\n", i, equiv, orig_c[i]); \ + print_v##type##bits##x##size(stderr,a); \ + print_v##type##bits##x##size(stderr,b); \ + print_v##type##bits##x##size(stderr,c); \ fprintf(stderr, "\n"); \ return 1; \ } \ @@ -24,63 +24,81 @@ return 0; \ } -#define CREATE_TESTS_SIGN(sign, psign, bits, size) \ - CREATE_TEST(sign, psign, bits, size, cmplt, orig_a[i] < orig_b[i]) \ - CREATE_TEST(sign, psign, bits, size, cmpgt, orig_a[i] > orig_b[i]) \ - CREATE_TEST(sign, psign, bits, size, cmpeq, orig_a[i] == orig_b[i]) \ - CREATE_TEST(sign, psign, bits, size, cmple, orig_a[i] <= orig_b[i]) \ - CREATE_TEST(sign, psign, bits, size, cmpge, orig_a[i] >= orig_b[i]) +#define CREATE_TESTS_SIGN(type, print, bits, size) \ + CREATE_TEST(type, print, bits, size, cmplt, orig_a[i] < orig_b[i]) \ + CREATE_TEST(type, print, bits, size, cmpgt, orig_a[i] > orig_b[i]) \ + CREATE_TEST(type, print, bits, size, cmpeq, orig_a[i] == orig_b[i]) \ + CREATE_TEST(type, print, bits, size, cmple, orig_a[i] <= orig_b[i]) \ + CREATE_TEST(type, print, bits, size, cmpge, orig_a[i] >= orig_b[i]) + +#define CREATE_TESTS_INT(bits, size) \ + CREATE_TESTS_SIGN(int, PRI##d##bits, bits, size) \ + CREATE_TESTS_SIGN(uint, PRI##u##bits, bits, size) -#define CREATE_TESTS(bits, size) CREATE_TESTS_SIGN(, d, bits, size) CREATE_TESTS_SIGN(u, u, bits, size) +#define CREATE_TESTS_FLOAT(bits, size) \ + CREATE_TESTS_SIGN(f, "f", bits, size) + +CREATE_TESTS_INT(8, 2) -CREATE_TESTS(8, 2) +CREATE_TESTS_INT(8, 4) +CREATE_TESTS_INT(16, 2) -CREATE_TESTS(8, 4) -CREATE_TESTS(16, 2) +CREATE_TESTS_INT(8, 8) +CREATE_TESTS_INT(16, 4) +CREATE_TESTS_INT(32, 2) -CREATE_TESTS(8, 8) -CREATE_TESTS(16, 4) -CREATE_TESTS(32, 2) +CREATE_TESTS_INT(8, 16) +CREATE_TESTS_INT(16, 8) +CREATE_TESTS_INT(32, 4) +CREATE_TESTS_INT(64, 2) -CREATE_TESTS(8, 16) -CREATE_TESTS(16, 8) -CREATE_TESTS(32, 4) -CREATE_TESTS(64, 2) +CREATE_TESTS_INT(8, 32) +CREATE_TESTS_INT(16, 16) +CREATE_TESTS_INT(32, 8) +CREATE_TESTS_INT(64, 4) -CREATE_TESTS(8, 32) -CREATE_TESTS(16, 16) -CREATE_TESTS(32, 8) -CREATE_TESTS(64, 4) +CREATE_TESTS_INT(8, 64) +CREATE_TESTS_INT(16, 32) +CREATE_TESTS_INT(32, 16) +CREATE_TESTS_INT(64, 8) -CREATE_TESTS(8, 64) -CREATE_TESTS(16, 32) -CREATE_TESTS(32, 16) -CREATE_TESTS(64, 8) +CREATE_TESTS_FLOAT(32, 2) +CREATE_TESTS_FLOAT(32, 4) +CREATE_TESTS_FLOAT(32, 8) +CREATE_TESTS_FLOAT(32, 16) + +CREATE_TESTS_FLOAT(64, 2) +CREATE_TESTS_FLOAT(64, 4) +CREATE_TESTS_FLOAT(64, 8) #undef CREATE_TESTS_SIGN -#undef CREATE_TESTS +#undef CREATE_TESTS_INT +#undef CREATE_TESTS_FLOAT #undef CREATE_TEST static int test_compare(void) { int ret = 0; -#define RUN_TESTS_SIGN(sign, bits, size) \ - for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \ - const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \ - for (size_t j = 0U; j < ARRAY_SIZE(testval##sign##bits); j++) { \ - const v##sign##int##bits##x##size b = vtest##sign##bits##x##size(j); \ - ret |= test_compare_v##sign##int##bits##x##size##_cmplt(a, b); \ - ret |= test_compare_v##sign##int##bits##x##size##_cmpgt(a, b); \ - ret |= test_compare_v##sign##int##bits##x##size##_cmpeq(a, b); \ - ret |= test_compare_v##sign##int##bits##x##size##_cmple(a, b); \ - ret |= test_compare_v##sign##int##bits##x##size##_cmpge(a, b); \ +#define RUN_TESTS_SIGN(shorttype, type, bits, size) \ + for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \ + const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \ + for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \ + const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \ + ret |= test_compare_v##type##bits##x##size##_cmplt(a, b); \ + ret |= test_compare_v##type##bits##x##size##_cmpgt(a, b); \ + ret |= test_compare_v##type##bits##x##size##_cmpeq(a, b); \ + ret |= test_compare_v##type##bits##x##size##_cmple(a, b); \ + ret |= test_compare_v##type##bits##x##size##_cmpge(a, b); \ } \ } #define RUN_TESTS(bits, size) \ - RUN_TESTS_SIGN( , bits, size) \ - RUN_TESTS_SIGN(u, bits, size) + RUN_TESTS_SIGN( , int, bits, size) \ + RUN_TESTS_SIGN(u, uint, bits, size) + +#define RUN_TESTS_FLOAT(bits, size) \ + RUN_TESTS_SIGN(f, f, bits, size) RUN_TESTS(8, 2) @@ -106,7 +124,17 @@ RUN_TESTS(32, 16) RUN_TESTS(64, 8) + RUN_TESTS_FLOAT(32, 2) + RUN_TESTS_FLOAT(32, 4) + RUN_TESTS_FLOAT(32, 8) + RUN_TESTS_FLOAT(32, 16) + + RUN_TESTS_FLOAT(64, 2) + RUN_TESTS_FLOAT(64, 4) + RUN_TESTS_FLOAT(64, 8) + #undef RUN_TESTS_SIGN +#undef RUN_TESTS_FLOAT #undef RUN_TESTS return ret;
--- a/utils/genaltivec.c Tue Apr 29 16:54:13 2025 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,376 +0,0 @@ -/** - * vec - a tiny SIMD vector library in C99 - * - * Copyright (c) 2024-2025 Paper - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. -**/ - -/* Use this file to generate include/vec/impl/ppc/altivec.h !! - * - * `gcc -o genaltivec genaltivec.c` */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) - -/* ------------------------------------------------------------------------ */ - -/* #define USE_VSX_EXTENSIONS */ - -enum op { - /* return vector, take in a integer */ - OP_SPLAT = 0, - - /* return vector, take in an array */ - OP_LOAD_ALIGNED, - OP_LOAD, - - /* void, take in vector and array */ - OP_STORE_ALIGNED, - OP_STORE, - - /* return vector, takes in two vectors */ - OP_ADD, - OP_SUB, - OP_MUL, - OP_AND, - OP_OR, - OP_XOR, - OP_CMPLT, - OP_CMPEQ, - OP_CMPGT, -#ifdef USE_VSX_EXTENSIONS - OP_CMPLE, - OP_CMPGE, -#endif - OP_MIN, - OP_MAX, - OP_AVG, - - /* return vector, takes in a vector and an explicitly unsigned vector */ - OP_LSHIFT, - OP_LRSHIFT, - OP_RSHIFT, - - OP_FINAL_, - - /* unimplemented, no altivec version :) */ - OP_NOT, -}; - -/* convert op -> string */ -static struct { - const char *u; - const char *l; -} op_names[] = { - [OP_SPLAT] = {"SPLAT", "splat"}, - [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"}, - [OP_LOAD] = {"LOAD", "load"}, - [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"}, - [OP_STORE] = {"STORE", "store"}, - [OP_ADD] = {"ADD", "add"}, - [OP_SUB] = {"SUB", "sub"}, - [OP_MUL] = {"MUL", "mul"}, - [OP_AVG] = {"AVG", "avg"}, - [OP_AND] = {"AND", "and"}, - [OP_OR] = {"OR", "or"}, - [OP_XOR] = {"XOR", "xor"}, - [OP_NOT] = {"NOT", "not"}, - [OP_CMPLT] = {"CMPLT", "cmplt"}, - [OP_CMPEQ] = {"CMPEQ", "cmpeq"}, - [OP_CMPGT] = {"CMPGT", "cmpgt"}, -#ifdef USE_VSX_EXTENSIONS - [OP_CMPLE] = {"CMPLE", "cmple"}, - [OP_CMPGE] = {"CMPGE", "cmpge"}, -#endif - [OP_MIN] = {"MIN", "min"}, - [OP_MAX] = {"MAX", "max"}, - [OP_RSHIFT] = {"RSHIFT", "rshift"}, - [OP_LRSHIFT] = {"LRSHIFT", "lrshift"}, - [OP_LSHIFT] = {"LSHIFT", "lshift"}, -}; - -#define UPSIGN(x) ((x) ? "" : "U") -#define LOSIGN(x) ((x) ? "" : "u") - -#define LOAVSIGN(x) ((x) ? "s" : "u") - -static void print_gcc_op(enum op op, int is_signed, int bits, int size) -{ - int i; - - /* compatibility with ancient gcc */ - switch (op) { - case OP_MUL: - puts("#ifdef vec_mul"); - break; - case OP_SPLAT: - printf("#if defined(vec_splats) || defined(vec_splat_%s%d)\n", (is_signed) ? "s" : "u", bits); - break; - default: - break; - } - - printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); - - printf("VEC_FUNC_IMPL "); - - /* first; the return value */ - switch (op) { - case OP_SPLAT: - case OP_LOAD_ALIGNED: - case OP_LOAD: - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: -#ifdef USE_VSX_EXTENSIONS - case OP_CMPLE: - case OP_CMPGE: -#endif - case OP_MIN: - case OP_MAX: - case OP_AVG: - case OP_RSHIFT: - case OP_LRSHIFT: - case OP_LSHIFT: - case OP_NOT: - printf("v%sint%dx%d", LOSIGN(is_signed), bits, size); - break; - case OP_STORE_ALIGNED: - case OP_STORE: - printf("void"); - break; - } - - /* whitespace and function name */ - printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l); - - /* parameters */ - switch (op) { - case OP_SPLAT: - printf("vec_%sint%d x", LOSIGN(is_signed), bits); - break; - case OP_LOAD_ALIGNED: - case OP_LOAD: - printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size); - break; - case OP_STORE_ALIGNED: - case OP_STORE: - printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); - break; - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: -#ifdef USE_VSX_EXTENSIONS - case OP_CMPLE: - case OP_CMPGE: -#endif - case OP_MIN: - case OP_MAX: - case OP_AVG: - printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); - break; - case OP_RSHIFT: - case OP_LRSHIFT: - case OP_LSHIFT: - printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size); - break; - case OP_NOT: - printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size); - break; - } - - puts(")\n{"); - - switch (op) { - case OP_SPLAT: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - puts("\tvec.altivec = vec_splats(x);"); - puts("\treturn vec;"); - break; - case OP_LOAD_ALIGNED: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - puts("\tvec.altivec = vec_ld(0, x);"); - puts("\treturn vec;"); - break; - case OP_LOAD: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));"); - puts("\treturn vec;"); - break; - case OP_STORE_ALIGNED: - puts("\tvec_st(vec.altivec, 0, arr);"); - break; - case OP_STORE: - /* ??? */ - puts("\tmemcpy(arr, &vec, sizeof(vec));"); - break; - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_AVG: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: -#ifdef USE_VSX_EXTENSIONS - case OP_CMPLE: - case OP_CMPGE: -#endif - case OP_LSHIFT: - case OP_LRSHIFT: - case OP_RSHIFT: - case OP_MIN: - case OP_MAX: { - static const char *op_altivec[OP_LRSHIFT - OP_ADD + 1] = {"add", "sub", "mul", "and", "or", "xor", "cmplt", "cmpeq", "cmpgt", -#ifdef USE_VSX_EXTENSIONS - "cmple", - "cmpge", -#endif - "min", "max", "avg", "sl", "sr"}; - static const char *types[] = {"char", "short", NULL, "int"}; - - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - if (op == OP_RSHIFT) { - printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "a" : ""); - } else { - printf("\tvec.altivec = (vector %s %s)vec_%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "signed" : "unsigned", types[(bits / 8) - 1], op_altivec[op - OP_ADD]); - } - puts("\treturn vec;"); - break; - } - default: - printf("#error implement this operation"); - break; - } - - /* end function definition */ - puts("}"); - - printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); - puts("#endif"); - - switch (op) { - case OP_SPLAT: - case OP_MUL: - puts("#endif"); - break; - default: - break; - } -} - -static inline void print_ops(int is_signed, int bits, int size) -{ - int i; - - printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); - - for (i = 0; i < OP_FINAL_; i++) - print_gcc_op(i, is_signed, bits, size); -} - -#ifdef USE_VSX_EXTENSIONS -# define HEADER_GUARD_NAME "VSX" -#else -# define HEADER_GUARD_NAME "ALTIVEC" -#endif - -static const char *header = - "/**\n" - " * vec - a tiny SIMD vector library in C99\n" - " * \n" - " * Copyright (c) 2024-2025 Paper\n" - " * \n" - " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" - " * of this software and associated documentation files (the \"Software\"), to deal\n" - " * in the Software without restriction, including without limitation the rights\n" - " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - " * copies of the Software, and to permit persons to whom the Software is\n" - " * furnished to do so, subject to the following conditions:\n" - " * \n" - " * The above copyright notice and this permission notice shall be included in all\n" - " * copies or substantial portions of the Software.\n" - " * \n" - " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" - " * SOFTWARE.\n" - "**/\n" - "\n" - "/* This file is automatically generated! Do not edit it directly!\n" - " * Edit the code that generates it in utils/genaltivec.c --paper */\n" - "\n" - "#ifndef VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n" - "#define VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n" - "\n"; - -static const char *footer = - "#endif /* VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_ */\n"; - -int main(void) -{ - static struct { - int bits, size; - } defs[] = { - /* -- 8-bit */ - {8, 16}, - /* -- 16-bit */ - {16, 8}, - - /* -- 32-bit */ - {32, 4}, - -#ifdef USE_VSX_EXTENSIONS - /* -- 64-bit */ - {64, 2}, -#endif - }; - int i; - - puts(header); - - for (i = 0; i < ARRAY_SIZE(defs); i++) { - print_ops(1, defs[i].bits, defs[i].size); - print_ops(0, defs[i].bits, defs[i].size); - } - - puts(footer); -}
--- a/utils/gendouble.c Tue Apr 29 16:54:13 2025 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,244 +0,0 @@ -/** - * vec - a tiny SIMD vector library in C99 - * - * Copyright (c) 2024-2025 Paper - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. -**/ - -/* Use this file to generate include/vec/impl/double.h !! - * - * `gcc -o gendouble gendouble.c` */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) - -/* XXX: would it be faster to unroll literally everything instead of defining everything, - * and then unpacking it all? */ -static const char *header = - "/**\n" - " * vec - a tiny SIMD vector library in C99\n" - " * \n" - " * Copyright (c) 2024-2025 Paper\n" - " * \n" - " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" - " * of this software and associated documentation files (the \"Software\"), to deal\n" - " * in the Software without restriction, including without limitation the rights\n" - " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - " * copies of the Software, and to permit persons to whom the Software is\n" - " * furnished to do so, subject to the following conditions:\n" - " * \n" - " * The above copyright notice and this permission notice shall be included in all\n" - " * copies or substantial portions of the Software.\n" - " * \n" - " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" - " * SOFTWARE.\n" - "**/\n" - "\n" - "/* This file is automatically generated! Do not edit it directly!\n" - " * Edit the code that generates it in utils/gengeneric.c --paper */\n" - "\n" - "#ifndef VEC_IMPL_DOUBLE_H_\n" - "#define VEC_IMPL_DOUBLE_H_\n" - "\n" - "#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n" - " { \\\n" - " v##sign##int##bits##x##size vec; \\\n" - " \\\n" - " vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" - " vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" - " \\\n" - " return vec; \\\n" - " }\n" - "\n" - "#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n" - " { \\\n" - " v##sign##int##bits##x##size vec; \\\n" - " \\\n" - " vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n" - " vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n" - " \\\n" - " return vec; \\\n" - " }\n" - "\n" - "#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)\n" - "#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n" - "\n" - "#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \\\n" - " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n" - " { \\\n" - " v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \\\n" - " v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \\\n" - " }\n" - "\n" - "#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)\n" - "#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)\n" - "\n" - "#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n" - " { \\\n" - " vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \\\n" - " vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \\\n" - " \\\n" - " return vec1; \\\n" - " }\n" - "\n" - "#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)\n" - "#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)\n" - "#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)\n" - "#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)\n" - "#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)\n" - "\n" - "#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n" - " { \\\n" - " vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \\\n" - " vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \\\n" - " \\\n" - " return vec; \\\n" - " }\n" - "\n" - "#endif /* VEC_IMPL_DOUBLE_H_ */ \n" - "\n" - "/* ------------------------------------------------------------------------ */\n" - "/* PREPROCESSOR HELL INCOMING */\n" - ""; - -static const char *footer = - "" /* nothing */; - -/* ------------------------------------------------------------------------ */ - -static void print_generic_dbl_op(const char *op, int is_signed, int bits, int size) -{ - printf( - "#if !defined(V%sINT%dx%d_%s_DEFINED) && defined(V%sINT%dx%d_%s_DEFINED)\n" - "VEC_DOUBLE_%s(%s, %d, %d, %d)\n" - "# define V%sINT%dx%d_%s_DEFINED\n" - "#endif\n\n", - (is_signed ? "" : "U"), bits, size, op, (is_signed ? "" : "U"), bits, size / 2, op, - op, (is_signed ? "/* nothing */" : "u"), bits, size, size / 2, - (is_signed ? "" : "U"), bits, size, op); -} - -typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); - -static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) -{ - /* all supported operations here */ - static const char *ops[] = { - "SPLAT", - "LOAD_ALIGNED", - "LOAD", - "STORE_ALIGNED", - "STORE", - "ADD", - "SUB", - "MUL", - "DIV", - "MOD", - "AVG", - "AND", - "OR", - "XOR", - "NOT", - "CMPLT", - "CMPEQ", - "CMPGT", - "CMPLE", /* these two must be after CMPLT and CMPGT respectfully, */ - "CMPGE", /* because their definitions call those functions */ - "MIN", - "MAX", - "RSHIFT", - "LRSHIFT", - "LSHIFT", - NULL, - }; - int i; - - printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); - - for (i = 0; ops[i]; i++) - print_op(ops[i], is_signed, bits, size); -} - -int main(void) -{ - static struct { - int bits, size; - print_op_spec print_op; - } defs[] = { - /* -- 8-bit */ - {8, 4, print_generic_dbl_op}, - {8, 8, print_generic_dbl_op}, - {8, 16, print_generic_dbl_op}, - {8, 32, print_generic_dbl_op}, - {8, 64, print_generic_dbl_op}, - - /* -- 16-bit */ - {16, 4, print_generic_dbl_op}, - {16, 8, print_generic_dbl_op}, - {16, 16, print_generic_dbl_op}, - {16, 32, print_generic_dbl_op}, - - /* -- 32-bit */ - {32, 4, print_generic_dbl_op}, - {32, 8, print_generic_dbl_op}, - {32, 16, print_generic_dbl_op}, - - /* -- 64-bit */ - {64, 4, print_generic_dbl_op}, - {64, 8, print_generic_dbl_op}, - }; - int i; - - puts(header); - - for (i = 0; i < ARRAY_SIZE(defs); i++) { - print_ops(1, defs[i].bits, defs[i].size, defs[i].print_op); - print_ops(0, defs[i].bits, defs[i].size, defs[i].print_op); - } - - puts(footer); -}
--- a/utils/gengcc.c Tue Apr 29 16:54:13 2025 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,414 +0,0 @@ -/** - * vec - a tiny SIMD vector library in C99 - * - * Copyright (c) 2024-2025 Paper - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. -**/ - -/* Use this file to generate include/vec/impl/generic.h !! - * - * `gcc -o gengeneric gengeneric.c` */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) - -/* ------------------------------------------------------------------------ */ - -enum op { - /* return vector, take in a integer */ - OP_SPLAT = 0, - - /* return vector, take in an array */ - OP_LOAD_ALIGNED, - OP_LOAD, - - /* void, take in vector and array */ - OP_STORE_ALIGNED, - OP_STORE, - - /* return vector, takes in two vectors */ - OP_ADD, - OP_SUB, - OP_MUL, - OP_AND, - OP_OR, - OP_XOR, - OP_CMPLT, - OP_CMPEQ, - OP_CMPGT, - OP_CMPLE, - OP_CMPGE, - OP_MIN, - OP_MAX, - OP_AVG, - - /* return vector, takes in a vector and an explicitly unsigned vector */ - OP_LSHIFT, - OP_RSHIFT, - OP_LRSHIFT, - - /* return vector, takes in a vector */ - OP_NOT, - - OP_FINAL_, - - /* operations that have some sort of "caveat" should go here, until - * they are fixed or removed */ - - OP_DIV, /* this one causes a floating point exception on my machine. - * possibly we could change the behavior of divide-by-zero - * with some gcc pragma ? --paper */ - OP_MOD, /* ditto with the above */ -}; - -/* convert op -> string */ -static struct { - const char *u; - const char *l; -} op_names[] = { - [OP_SPLAT] = {"SPLAT", "splat"}, - [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"}, - [OP_LOAD] = {"LOAD", "load"}, - [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"}, - [OP_STORE] = {"STORE", "store"}, - [OP_ADD] = {"ADD", "add"}, - [OP_SUB] = {"SUB", "sub"}, - [OP_MUL] = {"MUL", "mul"}, - /*[OP_DIV] = {"DIV", "div"},*/ - [OP_AVG] = {"AVG", "avg"}, - [OP_AND] = {"AND", "and"}, - [OP_OR] = {"OR", "or"}, - [OP_XOR] = {"XOR", "xor"}, - [OP_NOT] = {"NOT", "not"}, - [OP_CMPLT] = {"CMPLT", "cmplt"}, - [OP_CMPEQ] = {"CMPEQ", "cmpeq"}, - [OP_CMPGT] = {"CMPGT", "cmpgt"}, - [OP_CMPLE] = {"CMPLE", "cmple"}, - [OP_CMPGE] = {"CMPGE", "cmpge"}, - [OP_MIN] = {"MIN", "min"}, - [OP_MAX] = {"MAX", "max"}, - [OP_RSHIFT] = {"RSHIFT", "rshift"}, - [OP_LRSHIFT] = {"LRSHIFT", "lrshift"}, - [OP_LSHIFT] = {"LSHIFT", "lshift"}, -}; - -#define UPSIGN(x) ((x) ? "" : "U") -#define LOSIGN(x) ((x) ? "" : "u") - -static void print_gcc_op(enum op op, int is_signed, int bits, int size) -{ - int i; - int gccprereq = 0; - - switch (op) { - case OP_CMPEQ: - case OP_CMPLE: - case OP_CMPLT: - case OP_CMPGT: - case OP_CMPGE: - case OP_MIN: - case OP_MAX: - case OP_LSHIFT: - case OP_RSHIFT: - case OP_LRSHIFT: - case OP_AVG: - puts("#if VEC_GNUC_ATLEAST(4, 3, 0)"); - gccprereq = 1; - break; - default: - break; - } - - printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); - - printf("VEC_FUNC_IMPL "); - - /* first; the return value */ - switch (op) { - case OP_SPLAT: - case OP_LOAD_ALIGNED: - case OP_LOAD: - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: - case OP_CMPLE: - case OP_CMPGE: - case OP_MIN: - case OP_MAX: - case OP_AVG: - case OP_RSHIFT: - case OP_LRSHIFT: - case OP_LSHIFT: - case OP_NOT: - printf("v%sint%dx%d", LOSIGN(is_signed), bits, size); - break; - case OP_STORE_ALIGNED: - case OP_STORE: - printf("void"); - break; - } - - /* whitespace and function name */ - printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l); - - /* parameters */ - switch (op) { - case OP_SPLAT: - printf("vec_%sint%d x", LOSIGN(is_signed), bits); - break; - case OP_LOAD_ALIGNED: - case OP_LOAD: - printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size); - break; - case OP_STORE_ALIGNED: - case OP_STORE: - printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); - break; - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: - case OP_CMPLE: - case OP_CMPGE: - case OP_MIN: - case OP_MAX: - case OP_AVG: - printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); - break; - case OP_RSHIFT: - case OP_LRSHIFT: - case OP_LSHIFT: - printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size); - break; - case OP_NOT: - printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size); - break; - } - - puts(")\n{"); - - switch (op) { - case OP_SPLAT: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - printf("\tvec.gcc = (__typeof__(vec.gcc)){"); - for (i = 0; i < size; i++) - printf("x,"); - printf("};\n"); - printf("\treturn vec;\n"); - break; - case OP_LOAD_ALIGNED: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); - printf("\treturn vec;\n"); - break; - case OP_LOAD: - printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); - puts("\tmemcpy(&vec, x, sizeof(vec));"); - printf("\treturn vec;\n"); - break; - case OP_STORE_ALIGNED: - puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;"); - break; - case OP_STORE: - puts("\tmemcpy(arr, &vec, sizeof(vec));"); - break; - case OP_ADD: - case OP_SUB: - case OP_MUL: - case OP_DIV: - case OP_AND: - case OP_OR: - case OP_XOR: - case OP_CMPLT: - case OP_CMPEQ: - case OP_CMPGT: - case OP_CMPLE: - case OP_CMPGE: { - const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="}; - - printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]); - printf("\treturn vec1;\n"); - break; - } - - case OP_LSHIFT: - case OP_RSHIFT: { - const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"}; - - printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]); - printf("\treturn vec1;\n"); - break; - } - - case OP_LRSHIFT: { - /* sigh */ - printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); - printf("\treturn vec1;\n"); - break; - } - case OP_MIN: - case OP_MAX: { - const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"}; - - printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size); - printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]); - printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); - printf("\treturn vec1;\n"); - break; - } - case OP_AVG: - printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); - - if (is_signed) { - puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); - puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); - puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); - puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); - puts(""); - printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); - } else { - printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);\n"); - } - - printf("\treturn vec1;\n"); - break; - case OP_NOT: - printf("\tvec.gcc = ~vec.gcc;\n"); - printf("\treturn vec;\n"); - break; - default: - printf("#error implement this operation"); - break; - } - - /* end function definition */ - puts("}"); - - printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); - puts("#endif"); - - if (gccprereq) - puts("#endif"); -} - -static inline void print_ops(int is_signed, int bits, int size) -{ - int i; - - printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); - - for (i = 0; i < OP_FINAL_; i++) - print_gcc_op(i, is_signed, bits, size); -} - -static const char *header = - "/**\n" - " * vec - a tiny SIMD vector library in C99\n" - " * \n" - " * Copyright (c) 2024-2025 Paper\n" - " * \n" - " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" - " * of this software and associated documentation files (the \"Software\"), to deal\n" - " * in the Software without restriction, including without limitation the rights\n" - " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - " * copies of the Software, and to permit persons to whom the Software is\n" - " * furnished to do so, subject to the following conditions:\n" - " * \n" - " * The above copyright notice and this permission notice shall be included in all\n" - " * copies or substantial portions of the Software.\n" - " * \n" - " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" - " * SOFTWARE.\n" - "**/\n" - "\n" - "/* This file is automatically generated! Do not edit it directly!\n" - " * Edit the code that generates it in utils/gengcc.c --paper */\n" - "\n" - "#ifndef VEC_IMPL_GCC_H_\n" - "#define VEC_IMPL_GCC_H_\n" - "\n"; - -static const char *footer = - "#endif /* VEC_IMPL_GCC_H_ */\n"; - -int main(void) -{ - static struct { - int bits, size; - } defs[] = { - /* -- 8-bit */ - {8, 2}, - {8, 4}, - {8, 8}, - {8, 16}, - {8, 32}, - {8, 64}, - - /* -- 16-bit */ - {16, 2}, - {16, 4}, - {16, 8}, - {16, 16}, - {16, 32}, - - /* -- 32-bit */ - {32, 2}, - {32, 4}, - {32, 8}, - {32, 16}, - - /* -- 64-bit */ - {64, 2}, - {64, 4}, - {64, 8}, - }; - int i; - - puts(header); - - for (i = 0; i < ARRAY_SIZE(defs); i++) { - print_ops(1, defs[i].bits, defs[i].size); - print_ops(0, defs[i].bits, defs[i].size); - } - - puts(footer); -}
--- a/utils/gengeneric.c Tue Apr 29 16:54:13 2025 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,336 +0,0 @@ -/** - * vec - a tiny SIMD vector library in C99 - * - * Copyright (c) 2024-2025 Paper - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. -**/ - -/* Use this file to generate include/vec/impl/generic.h !! - * - * `gcc -o gengeneric gengeneric.c` */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <ctype.h> - -#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) - -/* XXX: would it be faster to unroll literally everything instead of defining everything, - * and then unpacking it all? */ -static const char *header = - "/**\n" - " * vec - a tiny SIMD vector library in C99\n" - " * \n" - " * Copyright (c) 2024-2025 Paper\n" - " * \n" - " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" - " * of this software and associated documentation files (the \"Software\"), to deal\n" - " * in the Software without restriction, including without limitation the rights\n" - " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - " * copies of the Software, and to permit persons to whom the Software is\n" - " * furnished to do so, subject to the following conditions:\n" - " * \n" - " * The above copyright notice and this permission notice shall be included in all\n" - " * copies or substantial portions of the Software.\n" - " * \n" - " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" - " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" - " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" - " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" - " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" - " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" - " * SOFTWARE.\n" - "**/\n" - "\n" - "/* This file is automatically generated! Do not edit it directly!\n" - " * Edit the code that generates it in utils/gengeneric.c --paper */\n" - "\n" - "#ifndef VEC_IMPL_GENERIC_H_\n" - "#define VEC_IMPL_GENERIC_H_\n" - "\n" - "#define VEC_GENERIC_OPERATION(op, sign, bits, size) \\\n" - " do { \\\n" - " int i; \\\n" - " \\\n" - " for (i = 0; i < size; i++) \\\n" - " vec1.generic[i] = (op); \\\n" - " \\\n" - " return vec1; \\\n" - " } while (0)\n" - "\n" - "#define VEC_GENERIC_BUILTIN_OPERATION(op, sign, bits, size) \\\n" - " VEC_GENERIC_OPERATION(vec1.generic[i] op vec2.generic[i], sign, bits, size)\n" - "\n" - "#define VEC_GENERIC_CMP(op, sign, bits, size) \\\n" - " VEC_GENERIC_OPERATION((vec1.generic[i] op vec2.generic[i]) ? (vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits) : 0, sign, bits, size)\n" - "\n" - "/* okay, now we can do this crap: */\n" - "\n" - "#define VEC_GENERIC_SPLAT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n" - " { \\\n" - " v##sign##int##bits##x##size vec; \\\n" - " for (int i = 0; i < size; i++) \\\n" - " vec.generic[i] = x; \\\n" - " return vec; \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_LOAD_EX(name, sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits in[size]) \\\n" - " { \\\n" - " v##sign##int##bits##x##size vec; \\\n" - " memcpy(&vec, in, sizeof(vec_##sign##int##bits) * size); \\\n" - " return vec; \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_LOAD_ALIGNED(sign, bits, size) VEC_GENERIC_LOAD_EX(load_aligned, sign, bits, size)\n" - "#define VEC_GENERIC_LOAD(sign, bits, size) VEC_GENERIC_LOAD_EX(load, sign, bits, size)\n" - "\n" - "#define VEC_GENERIC_STORE_EX(name, sign, bits, size) \\\n" - " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \\\n" - " { \\\n" - " memcpy(out, &vec, sizeof(vec_##sign##int##bits) * size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_STORE_ALIGNED(sign, bits, size) VEC_GENERIC_STORE_EX(store_aligned, sign, bits, size)\n" - "#define VEC_GENERIC_STORE(sign, bits, size) VEC_GENERIC_STORE_EX(store, sign, bits, size)\n" - "\n" - "#define VEC_GENERIC_ADD(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(+, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_SUB(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(-, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_MUL(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(*, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_DIV(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_MOD(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " for (int i = 0; i < size; i++) \\\n" - " vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" - " \\\n" - " return vec1; \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_AND(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(&, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_OR(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(|, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_XOR(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_BUILTIN_OPERATION(^, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_NOT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n" - " { \\\n" - " return v##sign##int##bits##x##size##_xor(vec, v##sign##int##bits##x##size##_splat((vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits))); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_CMPLT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_CMP(<, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_CMPLE(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmpgt(vec1, vec2)); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_CMPEQ(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_CMP(==, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_CMPGE(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmplt(vec1, vec2)); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_CMPGT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_CMP(>, sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_LSHIFT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_OPERATION(vec_##sign##lshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_RSHIFT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_OPERATION(vec_##sign##rshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_LRSHIFT(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n" - " { \\\n" - " VEC_GENERIC_OPERATION(vec_urshift((vec_uint##bits)vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_MIN(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmplt(vec1, vec2); \\\n" - " \\\n" - " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" - " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" - " \\\n" - " return v##sign##int##bits##x##size##_or(a, b); \\\n" - " }\n" - "\n" - "#define VEC_GENERIC_MAX(sign, bits, size) \\\n" - " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" - " { \\\n" - " v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmpgt(vec1, vec2); \\\n" - " \\\n" - " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" - " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" - " \\\n" - " return v##sign##int##bits##x##size##_or(a, b); \\\n" - " }\n" - "\n" - "/* ------------------------------------------------------------------------ */\n" - "/* PREPROCESSOR HELL INCOMING */\n"; - -static const char *footer = - "#endif /* VEC_IMPL_GENERIC_H_ */\n"; - -/* ------------------------------------------------------------------------ */ - -static void print_generic_op(const char *op, int is_signed, int bits, int size) -{ - printf( - "#ifndef V%sINT%dx%d_%s_DEFINED\n" - "VEC_GENERIC_%s(%s, %d, %d)\n" - "# define V%sINT%dx%d_%s_DEFINED\n" - "#endif\n", - (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); -} - -typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); - -static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) -{ - /* all supported operations here */ - static const char *ops[] = { - "SPLAT", - "LOAD_ALIGNED", - "LOAD", - "STORE_ALIGNED", - "STORE", - "ADD", - "SUB", - "MUL", - "DIV", - "MOD", - "AVG", - "AND", - "OR", - "XOR", - "NOT", - "CMPLT", - "CMPEQ", - "CMPGT", - "CMPLE", /* these two must be after CMPLT and CMPGT respectfully, */ - "CMPGE", /* because their definitions call those functions */ - "MIN", - "MAX", - "RSHIFT", - "LRSHIFT", - "LSHIFT", - NULL, - }; - int i; - - printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); - - for (i = 0; ops[i]; i++) - print_op(ops[i], is_signed, bits, size); -} - -int main(void) -{ - static struct { - int bits, size; - print_op_spec print_op; - } defs[] = { - /* -- 8-bit */ - {8, 2, print_generic_op}, - - /* -- 16-bit */ - {16, 2, print_generic_op}, - - /* -- 32-bit */ - {32, 2, print_generic_op}, - - /* -- 64-bit */ - {64, 2, print_generic_op}, - }; - int i; - - puts(header); - - for (i = 0; i < ARRAY_SIZE(defs); i++) { - print_ops(1, defs[i].bits, defs[i].size, defs[i].print_op); - print_ops(0, defs[i].bits, defs[i].size, defs[i].print_op); - } - - puts(footer); -}