changeset 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents b0a3f0248ecc
children
files gen/README gen/genaltivec.c gen/gendouble.c gen/gengcc.c gen/gengeneric.c gen/genlib.c gen/genlib.h gen/genvsx.c include/vec/defs.h include/vec/impl/double.h include/vec/impl/gcc.h include/vec/impl/generic.h include/vec/impl/ppc/altivec.h include/vec/impl/ppc/vsx.h include/vec/impl/x86/sse2.h include/vec/impl/x86/sse3.h include/vec/vec.h test/Makefile.template test/test.c test/test_align.h test/test_arith.h test/test_benchmark.h test/test_benchmark_simple.c test/test_benchmark_vec.c test/test_compare.h utils/genaltivec.c utils/gendouble.c utils/gengcc.c utils/gengeneric.c
diffstat 29 files changed, 37160 insertions(+), 8869 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/README	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,23 @@
+These files are used to generate the actual implementation headers in
+`include/vec/impl`.
+
+All of them are basically compiled the same way:
+	gcc -o genIMPL genIMPL.c genlib.c
+
+You can generally base any new implementations off of one of the existing
+ones. Preferably, you would base it off the generic implementation, since
+it actually has all of the operations implemented (and serves as a
+reference as to how these operations *should* work). For example the avg
+operation on integers should be roughly equivalent to:
+	ceil((vec1 + vec2) / 2)
+
+Note how it always rounds up, rather than truncating towards zero. This
+is an important implementation detail, and stems from roots in AltiVec,
+as it was the inspiration behind much of the vec API.
+
+Note however, that avg has a different result with floating points that
+is equivalent to simply
+	((vec1 + vec2) / 2)
+as there is no real way to get around any possible truncation.
+
+Any overflow on integer operations should simply wrap around.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genaltivec.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,250 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+/* ------------------------------------------------------------------------ */
+
+/* #define USE_VSX_EXTENSIONS */
+/* #define USE_POWER8_EXTENSIONS */
+
+static int altivec_check(int op, int type, int bits, int size)
+{
+	switch (bits) {
+	case 8:
+	case 16:
+	case 32:
+#ifdef USE_VSX_EXTENSIONS
+	case 64:
+# ifndef USE_POWER8_EXTENSIONS
+		/* VSX has double, but not int64 */
+		if ((bits == 64) && (type != TYPE_FLOAT))
+			return 0;
+# endif
+#endif
+		if (bits * size == 128)
+			return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int altivec_check_int(int op, int type, int bits, int size)
+{
+	return (altivec_check(op, type, bits, size) && type != TYPE_FLOAT);
+}
+
+static int altivec_check_float(int op, int type, int bits, int size)
+{
+	return (altivec_check(op, type, bits, size) && type == TYPE_FLOAT);
+}
+
+static void altivec_ppcheck(int op, int type, int bits, int size)
+{
+	/* old gcc had a broken partial implementation
+	 * (why even bother adding it at all?) */
+	switch (op) {
+	case OP_MUL: printf("defined(vec_mul)"); break;
+	case OP_SPLAT: printf("defined(vec_splats)"); break;
+	}
+}
+
+static void altivec_splat(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_splats(x);");
+	puts("\treturn vec;");
+}
+
+static void altivec_load(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));");
+	puts("\treturn vec;");
+}
+
+static void altivec_load_aligned(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_ld(0, x);");
+	puts("\treturn vec;");
+}
+
+static void altivec_store_aligned(int op, int type, int bits, int size)
+{
+	puts("\tvec_st(vec.altivec, 0, x);");
+}
+
+/* no store? */
+
+static void altivec_print_native_type(int type, int bits)
+{
+	/* WITH DIRECTION AND MAGNITUDE! */
+	printf("vector ");
+
+	switch (type) {
+	case TYPE_INT:
+		printf("signed ");
+		break;
+	case TYPE_UINT:
+		printf("unsigned ");
+		break;
+	case TYPE_FLOAT:
+		/* nothing */
+		break;
+	}
+
+	switch (type) {
+	case TYPE_INT:
+	case TYPE_UINT:
+		switch (bits) {
+		case 8: printf("char"); break;
+		case 16: printf("short"); break;
+		case 32: printf("int"); break;
+		case 64: printf("long long"); break;
+		default: break;
+		}
+		break;
+	case TYPE_FLOAT:
+		switch (bits) {
+		case 32: printf("float"); break;
+		case 64: printf("double"); break;
+		default: break;
+		}
+	}
+}
+
+static void altivec_2op(int op, int type, int bits, int size)
+{
+	static const char *op_altivec[] = {
+		[OP_ADD]     = "add",
+		[OP_SUB]     = "sub",
+		[OP_MUL]     = "mul",
+		[OP_DIV]     = "div",
+		[OP_MOD]     = "mod",
+		[OP_AND]     = "and",
+		[OP_OR]      = "or",
+		[OP_XOR]     = "xor",
+		[OP_CMPLT]   = "cmplt",
+		[OP_CMPEQ]   = "cmpeq",
+		[OP_CMPGT]   = "cmpgt",
+		[OP_CMPLE]   = "cmple",
+		[OP_CMPGE]   = "cmpge",
+		[OP_MIN]     = "min",
+		[OP_MAX]     = "max",
+		[OP_AVG]     = "avg",
+		[OP_LSHIFT]  = "sl",
+		[OP_LRSHIFT] = "sr",
+	};
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	if (op == OP_RSHIFT) {
+		printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (type == TYPE_INT) ? "a" : "");
+	} else {
+		printf("\tvec.altivec = (");
+		altivec_print_native_type(type, bits);
+		printf(")vec_%s(vec1.altivec, vec2.altivec);\n", op_altivec[op]);
+	}
+
+	puts("\treturn vec;");
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {altivec_check, NULL, altivec_splat},
+	[OP_LOAD_ALIGNED] = {altivec_check, NULL, altivec_load_aligned},
+	[OP_LOAD] = {altivec_check, NULL, altivec_load},
+	[OP_STORE_ALIGNED] = {altivec_check, NULL, altivec_store_aligned},
+
+	/* arithmetic */
+	[OP_ADD] = {altivec_check, NULL, altivec_2op},
+	[OP_SUB] = {altivec_check, NULL, altivec_2op},
+	[OP_MUL] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	/* GCC fails to compile integer division, so limit to floats */
+	[OP_DIV] = {altivec_check_float, NULL, altivec_2op},
+#endif
+#if 0
+	/* This is Power10. I don't have any Power10 hardware :)
+	 * (well, I also don't have any VSX hardware. whatever) */
+	[OP_MOD] = {altivec_check_int, NULL, altivec_2op},
+#endif
+	[OP_AVG] = {altivec_check_int, NULL, altivec_2op},
+
+	/* bitwise */
+	[OP_AND] = {altivec_check, NULL, altivec_2op},
+	[OP_OR] = {altivec_check, NULL, altivec_2op},
+	[OP_XOR] = {altivec_check, NULL, altivec_2op},
+
+	/* min/max */
+	[OP_MIN] = {altivec_check, NULL, altivec_2op},
+	[OP_MAX] = {altivec_check, NULL, altivec_2op},
+
+	/* bitshift */
+	[OP_LSHIFT] = {altivec_check, NULL, altivec_2op},
+	[OP_LRSHIFT] = {altivec_check, NULL, altivec_2op},
+	[OP_RSHIFT] = {altivec_check, NULL, altivec_2op},
+
+	/* comparison */
+	[OP_CMPLT] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	[OP_CMPLE] = {altivec_check, NULL, altivec_2op},
+#endif
+	[OP_CMPEQ] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	[OP_CMPGE] = {altivec_check, NULL, altivec_2op},
+#endif
+	[OP_CMPGT] = {altivec_check, NULL, altivec_2op},
+};
+
+
+int main(void)
+{
+	gen(op_impl,
+#ifdef USE_POWER8_EXTENSIONS
+		"power8"
+#elif defined(USE_VSX_EXTENSIONS)
+		"vsx"
+#else
+		"altivec"
+#endif
+	);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/gendouble.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,275 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+/* XXX: would it be faster to unroll literally everything instead of defining everything,
+ * and then unpacking it all? */
+static const char *header =
+	"/**\n"
+	" * vec - a tiny SIMD vector library in C99\n"
+	" * \n"
+	" * Copyright (c) 2024-2025 Paper\n"
+	" * \n"
+	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
+	" * of this software and associated documentation files (the \"Software\"), to deal\n"
+	" * in the Software without restriction, including without limitation the rights\n"
+	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+	" * copies of the Software, and to permit persons to whom the Software is\n"
+	" * furnished to do so, subject to the following conditions:\n"
+	" * \n"
+	" * The above copyright notice and this permission notice shall be included in all\n"
+	" * copies or substantial portions of the Software.\n"
+	" * \n"
+	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
+	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
+	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
+	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
+	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
+	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
+	" * SOFTWARE.\n"
+	"**/\n"
+	"\n"
+	"/* This file is automatically generated! Do not edit it directly!\n"
+	" * Edit the code that generates it in utils/gendbl.c  --paper */\n"
+	"\n"
+	"#ifndef VEC_IMPL_DOUBLE_H_\n"
+	"#define VEC_IMPL_DOUBLE_H_\n"
+	"\n"
+	"#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \\\n"
+	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n"
+	"	{ \\\n"
+	"		v##sign##int##bits##x##size vec; \\\n"
+	"	\\\n"
+	"		vec.dbl[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
+	"		vec.dbl[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
+	"	\\\n"
+	"		return vec; \\\n"
+	"	}\n"
+	"\n"
+	"#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \\\n"
+	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n"
+	"	{ \\\n"
+	"		v##sign##int##bits##x##size vec; \\\n"
+	"	\\\n"
+	"		vec.dbl[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n"
+	"		vec.dbl[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n"
+	"	\\\n"
+	"		return vec; \\\n"
+	"	}\n"
+	"\n"
+	"#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)\n"
+	"#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n"
+	"\n"
+	"#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \\\n"
+	"	VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n"
+	"	{ \\\n"
+	"		v##sign##int##bits##x##halfsize##_##name(vec.dbl[0], x); \\\n"
+	"		v##sign##int##bits##x##halfsize##_##name(vec.dbl[1], x + halfsize); \\\n"
+	"	}\n"
+	"\n"
+	"#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)\n"
+	"#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)\n"
+	"\n"
+	"#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \\\n"
+	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n"
+	"	{ \\\n"
+	"		vec1.dbl[0] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[0], vec2.dbl[0]); \\\n"
+	"		vec1.dbl[1] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[1], vec2.dbl[1]); \\\n"
+	"	\\\n"
+	"		return vec1; \\\n"
+	"	}\n"
+	"\n"
+	"#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)\n"
+	"#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)\n"
+	"#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)\n"
+	"#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)\n"
+	"\n"
+	"#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \\\n"
+	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n"
+	"	{ \\\n"
+	"		vec.dbl[0] = v##sign##int##bits##x##halfsize##_not(vec.dbl[0]); \\\n"
+	"		vec.dbl[1] = v##sign##int##bits##x##halfsize##_not(vec.dbl[1]); \\\n"
+	"	\\\n"
+	"		return vec; \\\n"
+	"	}\n"
+	"\n"
+	"#endif /* VEC_IMPL_DOUBLE_H_ */ \n"
+	"\n"
+	"/* ------------------------------------------------------------------------ */\n"
+	"/* PREPROCESSOR HELL INCOMING */\n"
+	"";
+
+static const char *footer = 
+	"" /* nothing */;
+
+/* ------------------------------------------------------------------------ */
+
+static void op_print_pp_halfsize(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("defined(V%s%dx%d_%s_DEFINED)", type_str[type].u, bits, size / 2, op_info->u);
+}
+
+static void op_print_twoop(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("\tvec1.dbl[0] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec1.dbl[0], vec2.dbl[0]);\n", op_info->l);
+
+	printf("\tvec1.dbl[1] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec1.dbl[1], vec2.dbl[1]);\n", op_info->l);
+
+	printf("\treturn vec1;\n");
+}
+
+static void op_print_unoop(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("\tvec.dbl[0] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec.dbl[0]);\n", op_info->l);
+
+	printf("\tvec1.dbl[1] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec.dbl[1]);\n", op_info->l);
+
+	printf("\treturn vec;\n");
+}
+
+static inline void op_print_load(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	printf("\tvec.dbl[0] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(x);\n", op_info->l);
+
+	printf("\tvec.dbl[1] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(x + %d);\n", op_info->l, size / 2);
+
+	printf("\treturn vec;\n");
+}
+
+static inline void op_print_splat(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	printf("\tvec.dbl[0] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(x);\n", op_info->l);
+
+	printf("\tvec.dbl[1] = ");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(x);\n", op_info->l);
+
+	printf("\treturn vec;\n");
+}
+
+static inline void op_print_store(int op, int type, int bits, int size)
+{
+	struct op_info *op_info = gen_op_info(op);
+
+	printf("\t");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec.dbl[0], x);\n", op_info->l);
+
+	printf("\t");
+	gen_print_vtype(type, bits, size / 2);
+	printf("_%s(vec.dbl[1], x + %d);\n", op_info->l, size / 2);
+}
+
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {NULL, op_print_pp_halfsize, op_print_splat},
+	[OP_LOAD_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_load},
+	[OP_LOAD] = {NULL, op_print_pp_halfsize, op_print_load},
+	[OP_STORE_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_store},
+	[OP_STORE] = {NULL, op_print_pp_halfsize, op_print_store},
+
+	/* arithmetic */
+	[OP_ADD] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_SUB] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_MUL] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_DIV] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_MOD] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_AVG] = {NULL, op_print_pp_halfsize, op_print_twoop},
+
+	/* bitwise */
+	[OP_AND] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_OR] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_XOR] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_NOT] = {NULL, op_print_pp_halfsize, op_print_unoop},
+
+	/* min/max */
+	[OP_MIN] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_MAX] = {NULL, op_print_pp_halfsize, op_print_twoop},
+
+	/* bitshift */
+	[OP_LSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_LRSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_RSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
+
+	/* comparison */
+	[OP_CMPLT] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_CMPLE] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_CMPEQ] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_CMPGE] = {NULL, op_print_pp_halfsize, op_print_twoop},
+	[OP_CMPGT] = {NULL, op_print_pp_halfsize, op_print_twoop},
+};
+
+int main(void)
+{
+	gen(op_impl, "double");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/gengcc.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,219 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+static int gcc_minmax_only_integer(int op, int type, int bits, int size)
+{
+	return (type == TYPE_INT || type == TYPE_UINT);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static void pp_gcc_prereq_4_3_0(int op, int type, int bits, int size)
+{
+	printf("VEC_GNUC_ATLEAST(4, 3, 0)");
+}
+
+static void gcc_print_easy_op(int op, int type, int bits, int size)
+{
+	static const char *op_builtins[] = {
+		[OP_ADD]   = "+",
+		[OP_SUB]   = "-",
+		[OP_MUL]   = "*",
+		[OP_DIV]   = "/",
+		[OP_MOD]   = "%",
+		[OP_AND]   = "&",
+		[OP_OR]    = "|",
+		[OP_XOR]   = "^",
+		[OP_CMPLT] = "<",
+		[OP_CMPLE] = "<=",
+		[OP_CMPEQ] = "==",
+		[OP_CMPGE] = ">=",
+		[OP_CMPGT] = ">",
+	};
+
+	printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]);
+	printf("\treturn vec1;\n");
+}
+
+static void gcc_print_splat(int op, int type, int bits, int size)
+{
+	int i;
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+	for (i = 0; i < size; i++)
+		printf("\tvec.gcc[%d] = x;\n", i);
+	printf("\treturn vec;\n");
+}
+
+static void gcc_print_load_aligned(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+	puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;");
+	printf("\treturn vec;\n");
+}
+
+static void gcc_print_load(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+	puts("\tmemcpy(&vec, x, sizeof(vec));");
+	printf("\treturn vec;\n");
+}
+
+static void gcc_print_store_aligned(int op, int type, int bits, int size)
+{
+	puts("\t*(__typeof__(vec.gcc) *)x = vec.gcc;");
+}
+
+static void gcc_print_store(int op, int type, int bits, int size)
+{
+	puts("\tmemcpy(x, &vec, sizeof(vec));");
+}
+
+static void gcc_print_rorlshift(int op, int type, int bits, int size)
+{
+	static const char *op_builtins[] = {
+		[OP_LSHIFT] = "<<",
+		[OP_RSHIFT] = ">>",
+	};
+
+	printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]);
+	printf("\treturn vec1;\n");
+}
+
+static void gcc_print_lrshift(int op, int type, int bits, int size)
+{
+	printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8);
+	printf("\treturn vec1;\n");
+}
+
+static void gcc_print_minmax(int op, int type, int bits, int size)
+{
+	static const char *op_builtins[] = {
+		[OP_MIN] = "<",
+		[OP_MAX] = ">"
+	};
+
+	switch (type) {
+	case TYPE_INT:
+	case TYPE_UINT:
+		/* yay */
+		printf("\t");
+		gen_print_vtype(type, bits, size);
+		printf(" mask;\n");
+		printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]);
+		printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n");
+		printf("\treturn vec1;\n");
+		break;
+	default:
+		/* hm? */
+		break;
+	}
+}
+
+static void gcc_print_avg(int op, int type, int bits, int size)
+{
+	switch (type) {
+	case TYPE_INT:
+		printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size);
+		puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);");
+		puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);");
+		puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);");
+		puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);");
+		puts("");
+		printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n");
+		break;
+	case TYPE_UINT:
+		printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n");
+		break;
+	case TYPE_FLOAT:
+		printf("\tvec1.gcc = (vec1.gcc + vec2.gcc) / 2;\n");
+		break;
+	}
+
+	printf("\treturn vec1;\n");
+}
+
+static void gcc_print_not(int op, int type, int bits, int size)
+{
+	printf("\tvec.gcc = ~vec.gcc;\n");
+	printf("\treturn vec;\n");
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {NULL, NULL, gcc_print_splat},
+	[OP_LOAD_ALIGNED] = {NULL, NULL, gcc_print_load_aligned},
+	[OP_LOAD] = {NULL, NULL, gcc_print_load},
+	[OP_STORE_ALIGNED] = {NULL, NULL, gcc_print_store_aligned},
+	[OP_STORE] = {NULL, NULL, gcc_print_store},
+
+	/* arithmetic */
+	[OP_ADD] = {NULL, NULL, gcc_print_easy_op},
+	[OP_SUB] = {NULL, NULL, gcc_print_easy_op},
+	[OP_MUL] = {NULL, NULL, gcc_print_easy_op},
+#if 0
+	/* no defined divide by zero behavior */
+	[OP_DIV] = {NULL, NULL, gcc_print_easy_op},
+	[OP_MOD] = {NULL, NULL, gcc_print_easy_op},
+#endif
+	[OP_AVG] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_avg},
+
+	/* bitwise */
+	[OP_AND] = {NULL, NULL, gcc_print_easy_op},
+	[OP_OR] = {NULL, NULL, gcc_print_easy_op},
+	[OP_XOR] = {NULL, NULL, gcc_print_easy_op},
+	[OP_NOT] = {NULL, NULL, gcc_print_not},
+
+	/* min/max */
+	[OP_MIN] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax},
+	[OP_MAX] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax},
+
+	/* bitshift */
+	[OP_LSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift},
+	[OP_LRSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_lrshift},
+	[OP_RSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift},
+
+	/* comparison */
+	[OP_CMPLT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op},
+	[OP_CMPLE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op},
+	[OP_CMPEQ] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op},
+	[OP_CMPGE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op},
+	[OP_CMPGT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op},
+};
+
+int main(void)
+{
+	gen(op_impl, "gcc");
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/gengeneric.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,274 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+static void op_builtin_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_ADD] = "+",
+		[OP_SUB] = "-",
+		[OP_MUL] = "*",
+		[OP_AND] = "&",
+		[OP_OR]  = "|",
+		[OP_XOR] = "^",
+	};
+	int i;
+
+	for (i = 0; i < size; i++)
+		printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i);
+
+	puts("\treturn vec1;");
+
+	(void)bits;
+}
+
+static void op_builtin_avg_pbody(int op, int type, int bits, int size)
+{
+	int i;
+
+	switch (type) {
+	case TYPE_INT:
+		printf("\t");
+		gen_print_stype(type, bits);
+		printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n");
+
+		for (i = 0; i < size; i++)
+			printf(
+				"\tx_d_rem = (vec1.generic[%d] % 2);\n"
+				"\ty_d_rem = (vec2.generic[%d] % 2);\n"
+				"\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n"
+				"\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n"
+				"\n"
+				"\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n"
+			, i, i, i, i, i);
+		break;
+	case TYPE_UINT:
+		for (i = 0; i < size; i++)
+			printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i);
+		break;
+	case TYPE_FLOAT:
+		/* this is probably fine. */
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i);
+		break;
+	}
+
+	printf("\treturn vec1;\n");
+}
+
+static void op_builtin_not_pbody(int op, int type, int bits, int size)
+{
+	int i;
+
+	for (i = 0; i < size; i++)
+		printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i);
+
+	puts("\treturn vec;");
+}
+
+static void op_builtin_shift_pbody(int op, int type, int bits, int size)
+{
+	int i;
+
+	switch (type) {
+	case TYPE_UINT: {
+		const char *ops[] = {
+			[OP_LSHIFT] = "<<",
+			[OP_RSHIFT] = ">>",
+			[OP_LRSHIFT] = ">>",
+		};
+
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]);
+		break;
+	}
+	case TYPE_INT: {
+		switch (op) {
+		case OP_LSHIFT:
+		case OP_LRSHIFT: {
+			const char *ops[] = {
+				[OP_LSHIFT] = "<<",
+				[OP_LRSHIFT] = ">>",
+			};
+
+			printf("\tunion { ");
+			gen_print_stype(TYPE_UINT, bits);
+			printf(" u; ");
+			gen_print_stype(TYPE_INT, bits);
+			puts(" s; } x;\n");
+
+			for (i = 0; i < size; i++)
+				printf(
+					"\tx.s = vec1.generic[%d];\n"
+					"\tx.u %s= vec2.generic[%d];\n"
+					"\tvec1.generic[%d] = x.s;\n",
+				i, ops[op], i, i);
+			break;
+		}
+		case OP_RSHIFT:
+			for (i = 0; i < size; i++)
+				printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i);
+			break;
+		}
+		break;
+	}
+	}
+
+	puts("\treturn vec1;");
+}
+
+static void op_builtin_nonzero_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_DIV] = "/",
+		[OP_MOD] = "%",
+	};
+	int i;
+
+	if (op == OP_MOD && type == TYPE_FLOAT) {
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i);
+	} else {
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i);
+	}
+
+	puts("\treturn vec1;");
+
+	(void)bits;
+}
+
+static void op_cmp_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_CMPLT] = "<",
+		[OP_CMPLE] = "<=",
+		[OP_CMPEQ] = "==",
+		[OP_CMPGE] = ">=",
+		[OP_CMPGT] = ">",
+	};
+	int i;
+
+	/* this is portable for int uint and float*/
+	for (i = 0; i < size; i++)
+		printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8);
+
+	puts("\treturn vec1;");
+}
+
+static void op_minmax_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_MIN] = "<",
+		[OP_MAX] = ">",
+	};
+	int i;
+
+	for (i = 0; i < size; i++)
+		printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i);
+
+	puts("\treturn vec1;");
+}
+
+static void op_splat_pbody(int op, int type, int bits, int size)
+{
+	int i;
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	for (i = 0; i < size; i++)
+		printf("\tvec.generic[%d] = x;\n", i);
+
+	puts("\treturn vec;");
+}
+
+static void op_load_pbody(int op, int type, int bits, int size)
+{
+	int i;
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size);
+
+	puts("\treturn vec;");
+}
+
+static void op_store_pbody(int op, int type, int bits, int size)
+{
+	printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size);
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {NULL, NULL, op_splat_pbody},
+	[OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody},
+	[OP_LOAD] = {NULL, NULL, op_load_pbody},
+	[OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody},
+	[OP_STORE] = {NULL, NULL, op_store_pbody},
+
+	/* arithmetic */
+	[OP_ADD] = {NULL, NULL, op_builtin_pbody},
+	[OP_SUB] = {NULL, NULL, op_builtin_pbody},
+	[OP_MUL] = {NULL, NULL, op_builtin_pbody},
+	[OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody},
+	[OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody},
+	[OP_AVG] = {NULL, NULL, op_builtin_avg_pbody},
+
+	/* bitwise */
+	[OP_AND] = {NULL, NULL, op_builtin_pbody},
+	[OP_OR] = {NULL, NULL, op_builtin_pbody},
+	[OP_XOR] = {NULL, NULL, op_builtin_pbody},
+	[OP_NOT] = {NULL, NULL, op_builtin_not_pbody},
+
+	/* min/max */
+	[OP_MIN] = {NULL, NULL, op_minmax_pbody},
+	[OP_MAX] = {NULL, NULL, op_minmax_pbody},
+
+	/* bitshift */
+	[OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+	[OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+	[OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+
+	/* comparison */
+	[OP_CMPLT] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPLE] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPEQ] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPGE] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPGT] = {NULL, NULL, op_cmp_pbody},
+};
+
+int main(void)
+{
+	gen(op_impl, "generic");
+
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genlib.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,294 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+struct strs type_str[] = {
+	[TYPE_INT]   = {"int", "INT"},
+	[TYPE_UINT]  = {"uint", "UINT"},
+	[TYPE_FLOAT] = {"f", "F"},
+};
+
+void gen_print_vtype(int type, int bits, int size)
+{
+	printf("v%s%dx%d", type_str[type].l, bits, size);
+}
+
+void gen_print_stype(int type, int bits)
+{
+	printf("vec_%s%d", type_str[type].l, bits);
+}
+
+static void vret(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+
+	(void)op;
+}
+
+static void nret(int op, int type, int bits, int size)
+{
+	printf("void");
+
+	(void)op, (void)type, (void)bits, (void)size;
+}
+
+static void voneparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec");
+
+	(void)op;
+}
+
+static void vtwoparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec1, ");
+	gen_print_vtype(type, bits, size);
+	printf(" vec2");
+
+	(void)op;
+}
+
+static void vshiftparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec1, ");
+	gen_print_vtype(TYPE_UINT, bits, size);
+	printf(" vec2");
+
+	(void)op;
+}
+
+static void vloadparam(int op, int type, int bits, int size)
+{
+	printf("const ");
+	gen_print_stype(type, bits);
+	printf(" x[%d]", size);
+
+	(void)op;
+}
+
+static void vsplatparam(int op, int type, int bits, int size)
+{
+	gen_print_stype(type, bits);
+	printf(" x");
+
+	(void)op, (void)size;
+}
+
+static void vstoreparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec, ");
+	gen_print_stype(type, bits);
+	printf(" x[%d]", size);
+
+	(void)op;
+}
+
+struct op_info ops[] = {
+	[OP_SPLAT] = {"SPLAT", "splat", vret, vsplatparam},
+	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned", vret, vloadparam},
+	[OP_LOAD] = {"LOAD", "load", vret, vloadparam},
+	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned", nret, vstoreparam},
+	[OP_STORE] = {"STORE", "store", nret, vstoreparam},
+	[OP_ADD] = {"ADD", "add", vret, vtwoparam},
+	[OP_SUB] = {"SUB", "sub", vret, vtwoparam},
+	[OP_MUL] = {"MUL", "mul", vret, vtwoparam},
+	[OP_DIV] = {"DIV", "div", vret, vtwoparam},
+	[OP_MOD] = {"MOD", "mod", vret, vtwoparam},
+	[OP_AVG] = {"AVG", "avg", vret, vtwoparam},
+	[OP_AND] = {"AND", "and", vret, vtwoparam},
+	[OP_OR] = {"OR", "or", vret, vtwoparam},
+	[OP_XOR] = {"XOR", "xor", vret, vtwoparam},
+	[OP_NOT] = {"NOT", "not", vret, voneparam},
+	[OP_CMPLT] = {"CMPLT", "cmplt", vret, vtwoparam},
+	[OP_CMPEQ] = {"CMPEQ", "cmpeq", vret, vtwoparam},
+	[OP_CMPGT] = {"CMPGT", "cmpgt", vret, vtwoparam},
+	[OP_CMPLE] = {"CMPLE", "cmple", vret, vtwoparam},
+	[OP_CMPGE] = {"CMPGE", "cmpge", vret, vtwoparam},
+	[OP_MIN] = {"MIN", "min", vret, vtwoparam},
+	[OP_MAX] = {"MAX", "max", vret, vtwoparam},
+	[OP_RSHIFT] = {"RSHIFT", "rshift", vret, vshiftparam},
+	[OP_LRSHIFT] = {"LRSHIFT", "lrshift", vret, vshiftparam},
+	[OP_LSHIFT] = {"LSHIFT", "lshift", vret, vshiftparam},
+};
+
+struct op_info *gen_op_info(int op)
+{
+	return &ops[op];
+}
+
+/* okay */
+extern int (*genlib_test(void))[(ARRAY_SIZE(ops) == OP_FINAL_) ? 1 : -2];
+
+int op_impl_check_always(int op, int type, int bits, int size)
+{
+	return 1;
+
+	(void)op, (void)type, (void)bits, (void)size;
+}
+
+static inline int verify_op(int op, int type)
+{
+	switch (op) {
+	case OP_AND:
+	case OP_XOR:
+	case OP_OR:
+	case OP_NOT:
+	case OP_RSHIFT:
+	case OP_LSHIFT:
+	case OP_LRSHIFT:
+		/* these operations make no sense for floating point */
+		if (type == TYPE_FLOAT)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
+/* XXX: would it be faster to unroll literally everything instead of defining everything,
+ * and then unpacking it all? */
+static const char *header_tmpl =
+	"/**\n"
+	" * vec - a tiny SIMD vector library in C99\n"
+	" * \n"
+	" * Copyright (c) 2024-2025 Paper\n"
+	" * \n"
+	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
+	" * of this software and associated documentation files (the \"Software\"), to deal\n"
+	" * in the Software without restriction, including without limitation the rights\n"
+	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+	" * copies of the Software, and to permit persons to whom the Software is\n"
+	" * furnished to do so, subject to the following conditions:\n"
+	" * \n"
+	" * The above copyright notice and this permission notice shall be included in all\n"
+	" * copies or substantial portions of the Software.\n"
+	" * \n"
+	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
+	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
+	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
+	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
+	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
+	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
+	" * SOFTWARE.\n"
+	"**/\n"
+	"\n"
+	"/* This file is automatically generated! Do not edit it directly!\n"
+	" * Edit the code that generates it in utils/gen%s.c  --paper */\n"
+	"\n"
+	"/* ------------------------------------------------------------------------ */\n"
+	"/* PREPROCESSOR HELL INCOMING */\n\n";
+
+int gen(struct op_impl op_impl[OP_FINAL_], const char *name)
+{
+	static struct {
+		int type, bits, size;
+	} types[] = {
+#define INT_TYPE(bits, size) {TYPE_INT, bits, size}, {TYPE_UINT, bits, size}
+
+		INT_TYPE(8, 2),
+		INT_TYPE(8, 4),
+		INT_TYPE(8, 8),
+		INT_TYPE(8, 16),
+		INT_TYPE(8, 32),
+		INT_TYPE(8, 64),
+
+		INT_TYPE(16, 2),
+		INT_TYPE(16, 4),
+		INT_TYPE(16, 8),
+		INT_TYPE(16, 16),
+		INT_TYPE(16, 32),
+
+		INT_TYPE(32, 2),
+		INT_TYPE(32, 4),
+		INT_TYPE(32, 8),
+		INT_TYPE(32, 16),
+
+		INT_TYPE(64, 2),
+		INT_TYPE(64, 4),
+		INT_TYPE(64, 8),
+
+#undef INT_TYPE
+
+		/* float */
+		{TYPE_FLOAT, 32, 2},
+		{TYPE_FLOAT, 32, 4},
+		{TYPE_FLOAT, 32, 8},
+		{TYPE_FLOAT, 32, 16},
+
+		/* double */
+		{TYPE_FLOAT, 64, 2},
+		{TYPE_FLOAT, 64, 4},
+		{TYPE_FLOAT, 64, 8},
+	};
+	int op;
+	size_t s;
+
+	printf(header_tmpl, name);
+
+	for (s = 0; s < ARRAY_SIZE(types); s++) {
+		for (op = 0; op < OP_FINAL_; op++) {
+			if (!op_impl[op].pbody)
+				continue; /* What? */
+
+			if (op_impl[op].check && !op_impl[op].check(op, types[s].type, types[s].bits, types[s].size))
+				continue;
+
+			if (!verify_op(op, types[s].type))
+				continue;
+
+			printf("#if !defined(V%s%dx%d_%s_DEFINED)", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u);
+
+			if (op_impl[op].ppcheck) {
+				printf(" \\\n\t && (");
+				op_impl[op].ppcheck(op, types[s].type, types[s].bits, types[s].size);
+				printf(")");
+			}
+
+			puts("");
+
+			printf("VEC_FUNC_IMPL ");
+			ops[op].pret(op, types[s].type, types[s].bits, types[s].size);
+			printf(" ");
+			gen_print_vtype(types[s].type, types[s].bits, types[s].size);
+			printf("_%s(", ops[op].l);
+			ops[op].pparam(op, types[s].type, types[s].bits, types[s].size);
+			puts(")\n{");
+
+			op_impl[op].pbody(op, types[s].type, types[s].bits, types[s].size);
+
+			puts("}");
+
+			printf("# define V%s%dx%d_%s_DEFINED\n", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u);
+			puts("#endif");
+		}
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genlib.h	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,108 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+enum {
+	OP_SPLAT,
+	OP_LOAD_ALIGNED,
+	OP_LOAD,
+	OP_STORE_ALIGNED,
+	OP_STORE,
+	OP_ADD,
+	OP_SUB,
+	OP_MUL,
+	OP_DIV,
+	OP_MOD,
+	OP_AVG,
+	OP_AND,
+	OP_OR,
+	OP_XOR,
+	OP_NOT,
+	OP_CMPLT,
+	OP_CMPEQ,
+	OP_CMPGT,
+	OP_CMPLE, /* these are after the other ones to make */
+	OP_CMPGE, /* implementing them as simple as NOT(CMPLT|CMPGT) */
+	OP_MIN,
+	OP_MAX,
+	OP_RSHIFT,
+	OP_LRSHIFT,
+	OP_LSHIFT,
+
+	/* use this for array sizes and the like */
+	OP_FINAL_,
+};
+
+enum {
+	TYPE_INT,   /* signed int */
+	TYPE_UINT,  /* unsigned int */
+	TYPE_FLOAT, /* IEEE float */
+};
+
+struct op_info {
+	const char *u;
+	const char *l;
+
+	/* print return type to stdout */
+	void (*pret)(int op, int type, int bits, int size);
+
+	/* print params type to stdout */
+	void (*pparam)(int op, int type, int bits, int size);
+};
+
+struct strs {
+	const char *l;
+	const char *u;
+};
+
+extern struct strs type_str[];
+
+struct op_info *gen_op_info(int op);
+
+struct op_impl {
+	/* return 1 if it's implemented for a specific set of
+	 * inputs :)
+	 *
+	 * if this function is not implemented, and `pbody`
+	 * is not NULL, then it is assumed that there are
+	 * no restrictions on what type, bits, or size can
+	 * be used. beware! */
+	int (*check)(int op, int type, int bits, int size);
+
+	/* prints any additional preprocessor checks needed
+	 * should start with a conditional, usually && */
+	void (*ppcheck)(int op, int type, int bits, int size);
+
+	/* sherman?
+	 * (this prints the actual body of the function...) */
+	void (*pbody)(int op, int type, int bits, int size);
+};
+
+int gen(struct op_impl op_impl[OP_FINAL_], const char *name);
+
+void gen_print_vtype(int type, int bits, int size);
+void gen_print_stype(int type, int bits);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genvsx.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,28 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+/* ok */
+
+#define USE_VSX_EXTENSIONS
+#include "genaltivec.c"
\ No newline at end of file
--- a/include/vec/defs.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/defs.h	Wed Apr 30 18:36:38 2025 -0400
@@ -27,12 +27,19 @@
 
 #include <string.h>
 #include <stdlib.h>
+#include <math.h> /* fmod */
+
+#ifdef __has_include
+# define VEC_HAS_INCLUDE(x) __has_include(x)
+#else
+# define VEC_HAS_INCLUDE(x)
+#endif
 
 #ifdef VEC_CUSTOM_INTEGER_TYPEDEF
 /* we already have custom integer typedefs; */
 # include "impl/integer.h"
 #else
-# if __cplusplus >= (201103L)
+# if defined(__cplusplus) && VEC_HAS_INCLUDE(<cstdint>) && VEC_HAS_INCLUDE(<cstddef>)
 #  include <cstdint>
 #  include <cstddef>
 typedef std::size_t    vec_uintsize;
@@ -49,7 +56,7 @@
 typedef std::int32_t   vec_int32;
 typedef std::int64_t   vec_int64;
 typedef std::intmax_t  vec_intmax;
-# elif __STDC_VERSION__ >= 199901L
+# elif (__STDC_VERSION__ >= 199901L) || (VEC_HAS_INCLUDE(<stdint.h>) && VEC_HAS_INCLUDE(<stddef.h>))
 #  include <stdint.h>
 #  include <stddef.h>
 typedef uint8_t   vec_uint8;
@@ -67,6 +74,11 @@
 # else
 #  error Unable to find integer types with known size.
 # endif
+
+/* this isn't necessarily true, but who cares :) */
+typedef float vec_f32;
+typedef double vec_f64;
+
 #endif
 
 #define VEC_SEMVER_ATLEAST(a, b, c, x, y, z) \
--- a/include/vec/impl/double.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/double.h	Wed Apr 30 18:36:38 2025 -0400
@@ -23,3699 +23,10311 @@
 **/
 
 /* This file is automatically generated! Do not edit it directly!
- * Edit the code that generates it in utils/gengeneric.c  --paper */
-
-#ifndef VEC_IMPL_DOUBLE_H_
-#define VEC_IMPL_DOUBLE_H_
-
-#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \
-	{ \
-		v##sign##int##bits##x##size vec; \
-	\
-		vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \
-		vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \
-	\
-		return vec; \
-	}
-
-#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \
-	{ \
-		v##sign##int##bits##x##size vec; \
-	\
-		vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \
-		vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \
-	\
-		return vec; \
-	}
-
-#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)
-#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)
-
-#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \
-	VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \
-	{ \
-		v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \
-		v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \
-	}
-
-#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)
-#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)
-
-#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \
-	{ \
-		vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \
-		vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \
-	\
-		return vec1; \
-	}
-
-#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)
-#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)
-#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)
-#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)
-#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)
-
-#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \
-	{ \
-		vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \
-		vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \
-	\
-		return vec; \
-	}
-
-#endif /* VEC_IMPL_DOUBLE_H_ */ 
+ * Edit the code that generates it in utils/gendouble.c  --paper */
 
 /* ------------------------------------------------------------------------ */
 /* PREPROCESSOR HELL INCOMING */
 
-
-
-/* vuint8x4 */
-
-#if !defined(VINT8x4_SPLAT_DEFINED) && defined(VINT8x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x2_SPLAT_DEFINED) \
+	 && (defined(VINT8x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x)
+{
+	vint8x2 vec;
+	vec.dbl[0] = vint8x1_splat(x);
+	vec.dbl[1] = vint8x1_splat(x);
+	return vec;
+}
+# define VINT8x2_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	vec.dbl[0] = vint8x1_load_aligned(x);
+	vec.dbl[1] = vint8x1_load_aligned(x + 1);
+	return vec;
+}
+# define VINT8x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x2_LOAD_DEFINED) \
+	 && (defined(VINT8x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	vec.dbl[0] = vint8x1_load(x);
+	vec.dbl[1] = vint8x1_load(x + 1);
+	return vec;
+}
+# define VINT8x2_LOAD_DEFINED
+#endif
+#if !defined(VINT8x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2])
+{
+	vint8x1_store_aligned(vec.dbl[0], x);
+	vint8x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VINT8x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x2_STORE_DEFINED) \
+	 && (defined(VINT8x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2])
+{
+	vint8x1_store(vec.dbl[0], x);
+	vint8x1_store(vec.dbl[1], x + 1);
+}
+# define VINT8x2_STORE_DEFINED
+#endif
+#if !defined(VINT8x2_ADD_DEFINED) \
+	 && (defined(VINT8x1_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_ADD_DEFINED
+#endif
+#if !defined(VINT8x2_SUB_DEFINED) \
+	 && (defined(VINT8x1_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_SUB_DEFINED
+#endif
+#if !defined(VINT8x2_MUL_DEFINED) \
+	 && (defined(VINT8x1_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_MUL_DEFINED
+#endif
+#if !defined(VINT8x2_DIV_DEFINED) \
+	 && (defined(VINT8x1_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_div(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_DIV_DEFINED
+#endif
+#if !defined(VINT8x2_MOD_DEFINED) \
+	 && (defined(VINT8x1_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_mod(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_MOD_DEFINED
+#endif
+#if !defined(VINT8x2_AVG_DEFINED) \
+	 && (defined(VINT8x1_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_AVG_DEFINED
+#endif
+#if !defined(VINT8x2_AND_DEFINED) \
+	 && (defined(VINT8x1_AND_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_AND_DEFINED
+#endif
+#if !defined(VINT8x2_OR_DEFINED) \
+	 && (defined(VINT8x1_OR_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_OR_DEFINED
+#endif
+#if !defined(VINT8x2_XOR_DEFINED) \
+	 && (defined(VINT8x1_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_XOR_DEFINED
+#endif
+#if !defined(VINT8x2_NOT_DEFINED) \
+	 && (defined(VINT8x1_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec)
+{
+	vec.dbl[0] = vint8x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VINT8x2_NOT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPLT_DEFINED) \
+	 && (defined(VINT8x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPEQ_DEFINED) \
+	 && (defined(VINT8x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x2_CMPGT_DEFINED) \
+	 && (defined(VINT8x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPLE_DEFINED) \
+	 && (defined(VINT8x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x2_CMPGE_DEFINED) \
+	 && (defined(VINT8x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x2_MIN_DEFINED) \
+	 && (defined(VINT8x1_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_MIN_DEFINED
+#endif
+#if !defined(VINT8x2_MAX_DEFINED) \
+	 && (defined(VINT8x1_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_MAX_DEFINED
+#endif
+#if !defined(VINT8x2_RSHIFT_DEFINED) \
+	 && (defined(VINT8x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x2_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x2_LSHIFT_DEFINED) \
+	 && (defined(VINT8x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vint8x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT8x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x2_SPLAT_DEFINED) \
+	 && (defined(VUINT8x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x)
+{
+	vuint8x2 vec;
+	vec.dbl[0] = vuint8x1_splat(x);
+	vec.dbl[1] = vuint8x1_splat(x);
+	return vec;
+}
+# define VUINT8x2_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	vec.dbl[0] = vuint8x1_load_aligned(x);
+	vec.dbl[1] = vuint8x1_load_aligned(x + 1);
+	return vec;
+}
+# define VUINT8x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x2_LOAD_DEFINED) \
+	 && (defined(VUINT8x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	vec.dbl[0] = vuint8x1_load(x);
+	vec.dbl[1] = vuint8x1_load(x + 1);
+	return vec;
+}
+# define VUINT8x2_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2])
+{
+	vuint8x1_store_aligned(vec.dbl[0], x);
+	vuint8x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VUINT8x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x2_STORE_DEFINED) \
+	 && (defined(VUINT8x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2])
+{
+	vuint8x1_store(vec.dbl[0], x);
+	vuint8x1_store(vec.dbl[1], x + 1);
+}
+# define VUINT8x2_STORE_DEFINED
+#endif
+#if !defined(VUINT8x2_ADD_DEFINED) \
+	 && (defined(VUINT8x1_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_ADD_DEFINED
+#endif
+#if !defined(VUINT8x2_SUB_DEFINED) \
+	 && (defined(VUINT8x1_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_SUB_DEFINED
+#endif
+#if !defined(VUINT8x2_MUL_DEFINED) \
+	 && (defined(VUINT8x1_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_MUL_DEFINED
+#endif
+#if !defined(VUINT8x2_DIV_DEFINED) \
+	 && (defined(VUINT8x1_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_div(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_DIV_DEFINED
+#endif
+#if !defined(VUINT8x2_MOD_DEFINED) \
+	 && (defined(VUINT8x1_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_mod(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_MOD_DEFINED
+#endif
+#if !defined(VUINT8x2_AVG_DEFINED) \
+	 && (defined(VUINT8x1_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_AVG_DEFINED
+#endif
+#if !defined(VUINT8x2_AND_DEFINED) \
+	 && (defined(VUINT8x1_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_AND_DEFINED
+#endif
+#if !defined(VUINT8x2_OR_DEFINED) \
+	 && (defined(VUINT8x1_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_OR_DEFINED
+#endif
+#if !defined(VUINT8x2_XOR_DEFINED) \
+	 && (defined(VUINT8x1_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_XOR_DEFINED
+#endif
+#if !defined(VUINT8x2_NOT_DEFINED) \
+	 && (defined(VUINT8x1_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec)
+{
+	vec.dbl[0] = vuint8x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VUINT8x2_NOT_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPLT_DEFINED) \
+	 && (defined(VUINT8x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPGT_DEFINED) \
+	 && (defined(VUINT8x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPLE_DEFINED) \
+	 && (defined(VUINT8x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPGE_DEFINED) \
+	 && (defined(VUINT8x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x2_MIN_DEFINED) \
+	 && (defined(VUINT8x1_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_MIN_DEFINED
+#endif
+#if !defined(VUINT8x2_MAX_DEFINED) \
+	 && (defined(VUINT8x1_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_MAX_DEFINED
+#endif
+#if !defined(VUINT8x2_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x2_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x2_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.dbl[0] = vuint8x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT8x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x4_SPLAT_DEFINED) \
+	 && (defined(VINT8x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x)
+{
+	vint8x4 vec;
+	vec.dbl[0] = vint8x2_splat(x);
+	vec.dbl[1] = vint8x2_splat(x);
+	return vec;
+}
 # define VINT8x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) && defined(VINT8x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	vec.dbl[0] = vint8x2_load_aligned(x);
+	vec.dbl[1] = vint8x2_load_aligned(x + 2);
+	return vec;
+}
 # define VINT8x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x4_LOAD_DEFINED) && defined(VINT8x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_LOAD_DEFINED) \
+	 && (defined(VINT8x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	vec.dbl[0] = vint8x2_load(x);
+	vec.dbl[1] = vint8x2_load(x + 2);
+	return vec;
+}
 # define VINT8x4_LOAD_DEFINED
 #endif
-
-#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) && defined(VINT8x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4])
+{
+	vint8x2_store_aligned(vec.dbl[0], x);
+	vint8x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VINT8x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x4_STORE_DEFINED) && defined(VINT8x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_STORE_DEFINED) \
+	 && (defined(VINT8x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4])
+{
+	vint8x2_store(vec.dbl[0], x);
+	vint8x2_store(vec.dbl[1], x + 2);
+}
 # define VINT8x4_STORE_DEFINED
 #endif
-
-#if !defined(VINT8x4_ADD_DEFINED) && defined(VINT8x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_ADD_DEFINED) \
+	 && (defined(VINT8x2_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_ADD_DEFINED
 #endif
-
-#if !defined(VINT8x4_SUB_DEFINED) && defined(VINT8x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_SUB_DEFINED) \
+	 && (defined(VINT8x2_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_SUB_DEFINED
 #endif
-
-#if !defined(VINT8x4_MUL_DEFINED) && defined(VINT8x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_MUL_DEFINED) \
+	 && (defined(VINT8x2_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_MUL_DEFINED
 #endif
-
-#if !defined(VINT8x4_DIV_DEFINED) && defined(VINT8x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_DIV_DEFINED) \
+	 && (defined(VINT8x2_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_div(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_DIV_DEFINED
 #endif
-
-#if !defined(VINT8x4_MOD_DEFINED) && defined(VINT8x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_MOD_DEFINED) \
+	 && (defined(VINT8x2_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_mod(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_MOD_DEFINED
 #endif
-
-#if !defined(VINT8x4_AVG_DEFINED) && defined(VINT8x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_AVG_DEFINED) \
+	 && (defined(VINT8x2_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_AVG_DEFINED
 #endif
-
-#if !defined(VINT8x4_AND_DEFINED) && defined(VINT8x2_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_AND_DEFINED) \
+	 && (defined(VINT8x2_AND_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_AND_DEFINED
 #endif
-
-#if !defined(VINT8x4_OR_DEFINED) && defined(VINT8x2_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_OR_DEFINED) \
+	 && (defined(VINT8x2_OR_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_OR_DEFINED
 #endif
-
-#if !defined(VINT8x4_XOR_DEFINED) && defined(VINT8x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_XOR_DEFINED) \
+	 && (defined(VINT8x2_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_XOR_DEFINED
 #endif
-
-#if !defined(VINT8x4_NOT_DEFINED) && defined(VINT8x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_NOT_DEFINED) \
+	 && (defined(VINT8x2_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec)
+{
+	vec.dbl[0] = vint8x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT8x4_NOT_DEFINED
 #endif
-
-#if !defined(VINT8x4_CMPLT_DEFINED) && defined(VINT8x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_CMPLT_DEFINED) \
+	 && (defined(VINT8x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT8x4_CMPEQ_DEFINED) && defined(VINT8x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_CMPEQ_DEFINED) \
+	 && (defined(VINT8x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT8x4_CMPGT_DEFINED) && defined(VINT8x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_CMPGT_DEFINED) \
+	 && (defined(VINT8x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT8x4_CMPLE_DEFINED) && defined(VINT8x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_CMPLE_DEFINED) \
+	 && (defined(VINT8x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT8x4_CMPGE_DEFINED) && defined(VINT8x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_CMPGE_DEFINED) \
+	 && (defined(VINT8x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT8x4_MIN_DEFINED) && defined(VINT8x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_MIN_DEFINED) \
+	 && (defined(VINT8x2_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_MIN_DEFINED
 #endif
-
-#if !defined(VINT8x4_MAX_DEFINED) && defined(VINT8x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_MAX_DEFINED) \
+	 && (defined(VINT8x2_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_MAX_DEFINED
 #endif
-
-#if !defined(VINT8x4_RSHIFT_DEFINED) && defined(VINT8x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_RSHIFT_DEFINED) \
+	 && (defined(VINT8x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x4_LRSHIFT_DEFINED) && defined(VINT8x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x4_LSHIFT_DEFINED) && defined(VINT8x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 8, 4, 2)
+#if !defined(VINT8x4_LSHIFT_DEFINED) \
+	 && (defined(VINT8x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vint8x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint8x4 */
-
-#if !defined(VUINT8x4_SPLAT_DEFINED) && defined(VUINT8x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 8, 4, 2)
+#if !defined(VUINT8x4_SPLAT_DEFINED) \
+	 && (defined(VUINT8x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x)
+{
+	vuint8x4 vec;
+	vec.dbl[0] = vuint8x2_splat(x);
+	vec.dbl[1] = vuint8x2_splat(x);
+	return vec;
+}
 # define VUINT8x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) && defined(VUINT8x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 8, 4, 2)
+#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	vec.dbl[0] = vuint8x2_load_aligned(x);
+	vec.dbl[1] = vuint8x2_load_aligned(x + 2);
+	return vec;
+}
 # define VUINT8x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x4_LOAD_DEFINED) && defined(VUINT8x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 8, 4, 2)
+#if !defined(VUINT8x4_LOAD_DEFINED) \
+	 && (defined(VUINT8x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	vec.dbl[0] = vuint8x2_load(x);
+	vec.dbl[1] = vuint8x2_load(x + 2);
+	return vec;
+}
 # define VUINT8x4_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) && defined(VUINT8x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 8, 4, 2)
+#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4])
+{
+	vuint8x2_store_aligned(vec.dbl[0], x);
+	vuint8x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VUINT8x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x4_STORE_DEFINED) && defined(VUINT8x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 8, 4, 2)
+#if !defined(VUINT8x4_STORE_DEFINED) \
+	 && (defined(VUINT8x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4])
+{
+	vuint8x2_store(vec.dbl[0], x);
+	vuint8x2_store(vec.dbl[1], x + 2);
+}
 # define VUINT8x4_STORE_DEFINED
 #endif
-
-#if !defined(VUINT8x4_ADD_DEFINED) && defined(VUINT8x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 8, 4, 2)
+#if !defined(VUINT8x4_ADD_DEFINED) \
+	 && (defined(VUINT8x2_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_ADD_DEFINED
 #endif
-
-#if !defined(VUINT8x4_SUB_DEFINED) && defined(VUINT8x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 8, 4, 2)
+#if !defined(VUINT8x4_SUB_DEFINED) \
+	 && (defined(VUINT8x2_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_SUB_DEFINED
 #endif
-
-#if !defined(VUINT8x4_MUL_DEFINED) && defined(VUINT8x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 8, 4, 2)
+#if !defined(VUINT8x4_MUL_DEFINED) \
+	 && (defined(VUINT8x2_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_MUL_DEFINED
 #endif
-
-#if !defined(VUINT8x4_DIV_DEFINED) && defined(VUINT8x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 8, 4, 2)
+#if !defined(VUINT8x4_DIV_DEFINED) \
+	 && (defined(VUINT8x2_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_div(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_DIV_DEFINED
 #endif
-
-#if !defined(VUINT8x4_MOD_DEFINED) && defined(VUINT8x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 8, 4, 2)
+#if !defined(VUINT8x4_MOD_DEFINED) \
+	 && (defined(VUINT8x2_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_mod(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_MOD_DEFINED
 #endif
-
-#if !defined(VUINT8x4_AVG_DEFINED) && defined(VUINT8x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 8, 4, 2)
+#if !defined(VUINT8x4_AVG_DEFINED) \
+	 && (defined(VUINT8x2_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_AVG_DEFINED
 #endif
-
-#if !defined(VUINT8x4_AND_DEFINED) && defined(VUINT8x2_AND_DEFINED)
-VEC_DOUBLE_AND(u, 8, 4, 2)
+#if !defined(VUINT8x4_AND_DEFINED) \
+	 && (defined(VUINT8x2_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_AND_DEFINED
 #endif
-
-#if !defined(VUINT8x4_OR_DEFINED) && defined(VUINT8x2_OR_DEFINED)
-VEC_DOUBLE_OR(u, 8, 4, 2)
+#if !defined(VUINT8x4_OR_DEFINED) \
+	 && (defined(VUINT8x2_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_OR_DEFINED
 #endif
-
-#if !defined(VUINT8x4_XOR_DEFINED) && defined(VUINT8x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 8, 4, 2)
+#if !defined(VUINT8x4_XOR_DEFINED) \
+	 && (defined(VUINT8x2_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_XOR_DEFINED
 #endif
-
-#if !defined(VUINT8x4_NOT_DEFINED) && defined(VUINT8x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 8, 4, 2)
+#if !defined(VUINT8x4_NOT_DEFINED) \
+	 && (defined(VUINT8x2_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec)
+{
+	vec.dbl[0] = vuint8x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT8x4_NOT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_CMPLT_DEFINED) && defined(VUINT8x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 8, 4, 2)
+#if !defined(VUINT8x4_CMPLT_DEFINED) \
+	 && (defined(VUINT8x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_CMPEQ_DEFINED) && defined(VUINT8x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 8, 4, 2)
+#if !defined(VUINT8x4_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT8x4_CMPGT_DEFINED) && defined(VUINT8x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 8, 4, 2)
+#if !defined(VUINT8x4_CMPGT_DEFINED) \
+	 && (defined(VUINT8x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_CMPLE_DEFINED) && defined(VUINT8x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 8, 4, 2)
+#if !defined(VUINT8x4_CMPLE_DEFINED) \
+	 && (defined(VUINT8x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT8x4_CMPGE_DEFINED) && defined(VUINT8x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 8, 4, 2)
+#if !defined(VUINT8x4_CMPGE_DEFINED) \
+	 && (defined(VUINT8x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT8x4_MIN_DEFINED) && defined(VUINT8x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 8, 4, 2)
+#if !defined(VUINT8x4_MIN_DEFINED) \
+	 && (defined(VUINT8x2_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_MIN_DEFINED
 #endif
-
-#if !defined(VUINT8x4_MAX_DEFINED) && defined(VUINT8x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 8, 4, 2)
+#if !defined(VUINT8x4_MAX_DEFINED) \
+	 && (defined(VUINT8x2_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_MAX_DEFINED
 #endif
-
-#if !defined(VUINT8x4_RSHIFT_DEFINED) && defined(VUINT8x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 8, 4, 2)
+#if !defined(VUINT8x4_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_LRSHIFT_DEFINED) && defined(VUINT8x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 8, 4, 2)
+#if !defined(VUINT8x4_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x4_LSHIFT_DEFINED) && defined(VUINT8x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 8, 4, 2)
+#if !defined(VUINT8x4_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.dbl[0] = vuint8x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint8x8 */
-
-#if !defined(VINT8x8_SPLAT_DEFINED) && defined(VINT8x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_SPLAT_DEFINED) \
+	 && (defined(VINT8x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x)
+{
+	vint8x8 vec;
+	vec.dbl[0] = vint8x4_splat(x);
+	vec.dbl[1] = vint8x4_splat(x);
+	return vec;
+}
 # define VINT8x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) && defined(VINT8x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	vec.dbl[0] = vint8x4_load_aligned(x);
+	vec.dbl[1] = vint8x4_load_aligned(x + 4);
+	return vec;
+}
 # define VINT8x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x8_LOAD_DEFINED) && defined(VINT8x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_LOAD_DEFINED) \
+	 && (defined(VINT8x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	vec.dbl[0] = vint8x4_load(x);
+	vec.dbl[1] = vint8x4_load(x + 4);
+	return vec;
+}
 # define VINT8x8_LOAD_DEFINED
 #endif
-
-#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) && defined(VINT8x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8])
+{
+	vint8x4_store_aligned(vec.dbl[0], x);
+	vint8x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VINT8x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x8_STORE_DEFINED) && defined(VINT8x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_STORE_DEFINED) \
+	 && (defined(VINT8x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8])
+{
+	vint8x4_store(vec.dbl[0], x);
+	vint8x4_store(vec.dbl[1], x + 4);
+}
 # define VINT8x8_STORE_DEFINED
 #endif
-
-#if !defined(VINT8x8_ADD_DEFINED) && defined(VINT8x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_ADD_DEFINED) \
+	 && (defined(VINT8x4_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_ADD_DEFINED
 #endif
-
-#if !defined(VINT8x8_SUB_DEFINED) && defined(VINT8x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_SUB_DEFINED) \
+	 && (defined(VINT8x4_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_SUB_DEFINED
 #endif
-
-#if !defined(VINT8x8_MUL_DEFINED) && defined(VINT8x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_MUL_DEFINED) \
+	 && (defined(VINT8x4_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_MUL_DEFINED
 #endif
-
-#if !defined(VINT8x8_DIV_DEFINED) && defined(VINT8x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_DIV_DEFINED) \
+	 && (defined(VINT8x4_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_div(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_DIV_DEFINED
 #endif
-
-#if !defined(VINT8x8_MOD_DEFINED) && defined(VINT8x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_MOD_DEFINED) \
+	 && (defined(VINT8x4_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_mod(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_MOD_DEFINED
 #endif
-
-#if !defined(VINT8x8_AVG_DEFINED) && defined(VINT8x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_AVG_DEFINED) \
+	 && (defined(VINT8x4_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_AVG_DEFINED
 #endif
-
-#if !defined(VINT8x8_AND_DEFINED) && defined(VINT8x4_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_AND_DEFINED) \
+	 && (defined(VINT8x4_AND_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_AND_DEFINED
 #endif
-
-#if !defined(VINT8x8_OR_DEFINED) && defined(VINT8x4_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_OR_DEFINED) \
+	 && (defined(VINT8x4_OR_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_OR_DEFINED
 #endif
-
-#if !defined(VINT8x8_XOR_DEFINED) && defined(VINT8x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_XOR_DEFINED) \
+	 && (defined(VINT8x4_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_XOR_DEFINED
 #endif
-
-#if !defined(VINT8x8_NOT_DEFINED) && defined(VINT8x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_NOT_DEFINED) \
+	 && (defined(VINT8x4_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec)
+{
+	vec.dbl[0] = vint8x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT8x8_NOT_DEFINED
 #endif
-
-#if !defined(VINT8x8_CMPLT_DEFINED) && defined(VINT8x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_CMPLT_DEFINED) \
+	 && (defined(VINT8x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT8x8_CMPEQ_DEFINED) && defined(VINT8x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_CMPEQ_DEFINED) \
+	 && (defined(VINT8x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT8x8_CMPGT_DEFINED) && defined(VINT8x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_CMPGT_DEFINED) \
+	 && (defined(VINT8x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT8x8_CMPLE_DEFINED) && defined(VINT8x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_CMPLE_DEFINED) \
+	 && (defined(VINT8x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT8x8_CMPGE_DEFINED) && defined(VINT8x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_CMPGE_DEFINED) \
+	 && (defined(VINT8x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT8x8_MIN_DEFINED) && defined(VINT8x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_MIN_DEFINED) \
+	 && (defined(VINT8x4_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_MIN_DEFINED
 #endif
-
-#if !defined(VINT8x8_MAX_DEFINED) && defined(VINT8x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_MAX_DEFINED) \
+	 && (defined(VINT8x4_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_MAX_DEFINED
 #endif
-
-#if !defined(VINT8x8_RSHIFT_DEFINED) && defined(VINT8x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_RSHIFT_DEFINED) \
+	 && (defined(VINT8x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x8_LRSHIFT_DEFINED) && defined(VINT8x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x8_LSHIFT_DEFINED) && defined(VINT8x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 8, 8, 4)
+#if !defined(VINT8x8_LSHIFT_DEFINED) \
+	 && (defined(VINT8x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vint8x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint8x8 */
-
-#if !defined(VUINT8x8_SPLAT_DEFINED) && defined(VUINT8x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 8, 8, 4)
+#if !defined(VUINT8x8_SPLAT_DEFINED) \
+	 && (defined(VUINT8x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x)
+{
+	vuint8x8 vec;
+	vec.dbl[0] = vuint8x4_splat(x);
+	vec.dbl[1] = vuint8x4_splat(x);
+	return vec;
+}
 # define VUINT8x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) && defined(VUINT8x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 8, 8, 4)
+#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	vec.dbl[0] = vuint8x4_load_aligned(x);
+	vec.dbl[1] = vuint8x4_load_aligned(x + 4);
+	return vec;
+}
 # define VUINT8x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x8_LOAD_DEFINED) && defined(VUINT8x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 8, 8, 4)
+#if !defined(VUINT8x8_LOAD_DEFINED) \
+	 && (defined(VUINT8x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	vec.dbl[0] = vuint8x4_load(x);
+	vec.dbl[1] = vuint8x4_load(x + 4);
+	return vec;
+}
 # define VUINT8x8_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) && defined(VUINT8x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 8, 8, 4)
+#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8])
+{
+	vuint8x4_store_aligned(vec.dbl[0], x);
+	vuint8x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VUINT8x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x8_STORE_DEFINED) && defined(VUINT8x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 8, 8, 4)
+#if !defined(VUINT8x8_STORE_DEFINED) \
+	 && (defined(VUINT8x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8])
+{
+	vuint8x4_store(vec.dbl[0], x);
+	vuint8x4_store(vec.dbl[1], x + 4);
+}
 # define VUINT8x8_STORE_DEFINED
 #endif
-
-#if !defined(VUINT8x8_ADD_DEFINED) && defined(VUINT8x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 8, 8, 4)
+#if !defined(VUINT8x8_ADD_DEFINED) \
+	 && (defined(VUINT8x4_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_ADD_DEFINED
 #endif
-
-#if !defined(VUINT8x8_SUB_DEFINED) && defined(VUINT8x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 8, 8, 4)
+#if !defined(VUINT8x8_SUB_DEFINED) \
+	 && (defined(VUINT8x4_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_SUB_DEFINED
 #endif
-
-#if !defined(VUINT8x8_MUL_DEFINED) && defined(VUINT8x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 8, 8, 4)
+#if !defined(VUINT8x8_MUL_DEFINED) \
+	 && (defined(VUINT8x4_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_MUL_DEFINED
 #endif
-
-#if !defined(VUINT8x8_DIV_DEFINED) && defined(VUINT8x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 8, 8, 4)
+#if !defined(VUINT8x8_DIV_DEFINED) \
+	 && (defined(VUINT8x4_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_div(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_DIV_DEFINED
 #endif
-
-#if !defined(VUINT8x8_MOD_DEFINED) && defined(VUINT8x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 8, 8, 4)
+#if !defined(VUINT8x8_MOD_DEFINED) \
+	 && (defined(VUINT8x4_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_mod(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_MOD_DEFINED
 #endif
-
-#if !defined(VUINT8x8_AVG_DEFINED) && defined(VUINT8x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 8, 8, 4)
+#if !defined(VUINT8x8_AVG_DEFINED) \
+	 && (defined(VUINT8x4_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_AVG_DEFINED
 #endif
-
-#if !defined(VUINT8x8_AND_DEFINED) && defined(VUINT8x4_AND_DEFINED)
-VEC_DOUBLE_AND(u, 8, 8, 4)
+#if !defined(VUINT8x8_AND_DEFINED) \
+	 && (defined(VUINT8x4_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_AND_DEFINED
 #endif
-
-#if !defined(VUINT8x8_OR_DEFINED) && defined(VUINT8x4_OR_DEFINED)
-VEC_DOUBLE_OR(u, 8, 8, 4)
+#if !defined(VUINT8x8_OR_DEFINED) \
+	 && (defined(VUINT8x4_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_OR_DEFINED
 #endif
-
-#if !defined(VUINT8x8_XOR_DEFINED) && defined(VUINT8x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 8, 8, 4)
+#if !defined(VUINT8x8_XOR_DEFINED) \
+	 && (defined(VUINT8x4_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_XOR_DEFINED
 #endif
-
-#if !defined(VUINT8x8_NOT_DEFINED) && defined(VUINT8x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 8, 8, 4)
+#if !defined(VUINT8x8_NOT_DEFINED) \
+	 && (defined(VUINT8x4_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec)
+{
+	vec.dbl[0] = vuint8x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT8x8_NOT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_CMPLT_DEFINED) && defined(VUINT8x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 8, 8, 4)
+#if !defined(VUINT8x8_CMPLT_DEFINED) \
+	 && (defined(VUINT8x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_CMPEQ_DEFINED) && defined(VUINT8x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 8, 8, 4)
+#if !defined(VUINT8x8_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT8x8_CMPGT_DEFINED) && defined(VUINT8x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 8, 8, 4)
+#if !defined(VUINT8x8_CMPGT_DEFINED) \
+	 && (defined(VUINT8x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_CMPLE_DEFINED) && defined(VUINT8x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 8, 8, 4)
+#if !defined(VUINT8x8_CMPLE_DEFINED) \
+	 && (defined(VUINT8x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT8x8_CMPGE_DEFINED) && defined(VUINT8x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 8, 8, 4)
+#if !defined(VUINT8x8_CMPGE_DEFINED) \
+	 && (defined(VUINT8x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT8x8_MIN_DEFINED) && defined(VUINT8x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 8, 8, 4)
+#if !defined(VUINT8x8_MIN_DEFINED) \
+	 && (defined(VUINT8x4_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_MIN_DEFINED
 #endif
-
-#if !defined(VUINT8x8_MAX_DEFINED) && defined(VUINT8x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 8, 8, 4)
+#if !defined(VUINT8x8_MAX_DEFINED) \
+	 && (defined(VUINT8x4_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_MAX_DEFINED
 #endif
-
-#if !defined(VUINT8x8_RSHIFT_DEFINED) && defined(VUINT8x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 8, 8, 4)
+#if !defined(VUINT8x8_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_LRSHIFT_DEFINED) && defined(VUINT8x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 8, 8, 4)
+#if !defined(VUINT8x8_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x8_LSHIFT_DEFINED) && defined(VUINT8x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 8, 8, 4)
+#if !defined(VUINT8x8_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.dbl[0] = vuint8x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint8x16 */
-
-#if !defined(VINT8x16_SPLAT_DEFINED) && defined(VINT8x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_SPLAT_DEFINED) \
+	 && (defined(VINT8x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
+{
+	vint8x16 vec;
+	vec.dbl[0] = vint8x8_splat(x);
+	vec.dbl[1] = vint8x8_splat(x);
+	return vec;
+}
 # define VINT8x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) && defined(VINT8x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	vec.dbl[0] = vint8x8_load_aligned(x);
+	vec.dbl[1] = vint8x8_load_aligned(x + 8);
+	return vec;
+}
 # define VINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x16_LOAD_DEFINED) && defined(VINT8x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_LOAD_DEFINED) \
+	 && (defined(VINT8x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	vec.dbl[0] = vint8x8_load(x);
+	vec.dbl[1] = vint8x8_load(x + 8);
+	return vec;
+}
 # define VINT8x16_LOAD_DEFINED
 #endif
-
-#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) && defined(VINT8x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16])
+{
+	vint8x8_store_aligned(vec.dbl[0], x);
+	vint8x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VINT8x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x16_STORE_DEFINED) && defined(VINT8x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_STORE_DEFINED) \
+	 && (defined(VINT8x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16])
+{
+	vint8x8_store(vec.dbl[0], x);
+	vint8x8_store(vec.dbl[1], x + 8);
+}
 # define VINT8x16_STORE_DEFINED
 #endif
-
-#if !defined(VINT8x16_ADD_DEFINED) && defined(VINT8x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_ADD_DEFINED) \
+	 && (defined(VINT8x8_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_ADD_DEFINED
 #endif
-
-#if !defined(VINT8x16_SUB_DEFINED) && defined(VINT8x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_SUB_DEFINED) \
+	 && (defined(VINT8x8_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_SUB_DEFINED
 #endif
-
-#if !defined(VINT8x16_MUL_DEFINED) && defined(VINT8x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_MUL_DEFINED) \
+	 && (defined(VINT8x8_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_MUL_DEFINED
 #endif
-
-#if !defined(VINT8x16_DIV_DEFINED) && defined(VINT8x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_DIV_DEFINED) \
+	 && (defined(VINT8x8_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_div(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_DIV_DEFINED
 #endif
-
-#if !defined(VINT8x16_MOD_DEFINED) && defined(VINT8x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_MOD_DEFINED) \
+	 && (defined(VINT8x8_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_mod(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_MOD_DEFINED
 #endif
-
-#if !defined(VINT8x16_AVG_DEFINED) && defined(VINT8x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_AVG_DEFINED) \
+	 && (defined(VINT8x8_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_AVG_DEFINED
 #endif
-
-#if !defined(VINT8x16_AND_DEFINED) && defined(VINT8x8_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_AND_DEFINED) \
+	 && (defined(VINT8x8_AND_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_AND_DEFINED
 #endif
-
-#if !defined(VINT8x16_OR_DEFINED) && defined(VINT8x8_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_OR_DEFINED) \
+	 && (defined(VINT8x8_OR_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_OR_DEFINED
 #endif
-
-#if !defined(VINT8x16_XOR_DEFINED) && defined(VINT8x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_XOR_DEFINED) \
+	 && (defined(VINT8x8_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_XOR_DEFINED
 #endif
-
-#if !defined(VINT8x16_NOT_DEFINED) && defined(VINT8x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_NOT_DEFINED) \
+	 && (defined(VINT8x8_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec)
+{
+	vec.dbl[0] = vint8x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT8x16_NOT_DEFINED
 #endif
-
-#if !defined(VINT8x16_CMPLT_DEFINED) && defined(VINT8x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_CMPLT_DEFINED) \
+	 && (defined(VINT8x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT8x16_CMPEQ_DEFINED) && defined(VINT8x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_CMPEQ_DEFINED) \
+	 && (defined(VINT8x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT8x16_CMPGT_DEFINED) && defined(VINT8x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_CMPGT_DEFINED) \
+	 && (defined(VINT8x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT8x16_CMPLE_DEFINED) && defined(VINT8x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_CMPLE_DEFINED) \
+	 && (defined(VINT8x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT8x16_CMPGE_DEFINED) && defined(VINT8x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_CMPGE_DEFINED) \
+	 && (defined(VINT8x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT8x16_MIN_DEFINED) && defined(VINT8x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_MIN_DEFINED) \
+	 && (defined(VINT8x8_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_MIN_DEFINED
 #endif
-
-#if !defined(VINT8x16_MAX_DEFINED) && defined(VINT8x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_MAX_DEFINED) \
+	 && (defined(VINT8x8_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_MAX_DEFINED
 #endif
-
-#if !defined(VINT8x16_RSHIFT_DEFINED) && defined(VINT8x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_RSHIFT_DEFINED) \
+	 && (defined(VINT8x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x16_LRSHIFT_DEFINED) && defined(VINT8x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x16_LSHIFT_DEFINED) && defined(VINT8x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 8, 16, 8)
+#if !defined(VINT8x16_LSHIFT_DEFINED) \
+	 && (defined(VINT8x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vint8x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint8x16 */
-
-#if !defined(VUINT8x16_SPLAT_DEFINED) && defined(VUINT8x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 8, 16, 8)
+#if !defined(VUINT8x16_SPLAT_DEFINED) \
+	 && (defined(VUINT8x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
+{
+	vuint8x16 vec;
+	vec.dbl[0] = vuint8x8_splat(x);
+	vec.dbl[1] = vuint8x8_splat(x);
+	return vec;
+}
 # define VUINT8x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) && defined(VUINT8x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 8, 16, 8)
+#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	vec.dbl[0] = vuint8x8_load_aligned(x);
+	vec.dbl[1] = vuint8x8_load_aligned(x + 8);
+	return vec;
+}
 # define VUINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x16_LOAD_DEFINED) && defined(VUINT8x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 8, 16, 8)
+#if !defined(VUINT8x16_LOAD_DEFINED) \
+	 && (defined(VUINT8x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	vec.dbl[0] = vuint8x8_load(x);
+	vec.dbl[1] = vuint8x8_load(x + 8);
+	return vec;
+}
 # define VUINT8x16_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) && defined(VUINT8x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 8, 16, 8)
+#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16])
+{
+	vuint8x8_store_aligned(vec.dbl[0], x);
+	vuint8x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VUINT8x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x16_STORE_DEFINED) && defined(VUINT8x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 8, 16, 8)
+#if !defined(VUINT8x16_STORE_DEFINED) \
+	 && (defined(VUINT8x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16])
+{
+	vuint8x8_store(vec.dbl[0], x);
+	vuint8x8_store(vec.dbl[1], x + 8);
+}
 # define VUINT8x16_STORE_DEFINED
 #endif
-
-#if !defined(VUINT8x16_ADD_DEFINED) && defined(VUINT8x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 8, 16, 8)
+#if !defined(VUINT8x16_ADD_DEFINED) \
+	 && (defined(VUINT8x8_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_ADD_DEFINED
 #endif
-
-#if !defined(VUINT8x16_SUB_DEFINED) && defined(VUINT8x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 8, 16, 8)
+#if !defined(VUINT8x16_SUB_DEFINED) \
+	 && (defined(VUINT8x8_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_SUB_DEFINED
 #endif
-
-#if !defined(VUINT8x16_MUL_DEFINED) && defined(VUINT8x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 8, 16, 8)
+#if !defined(VUINT8x16_MUL_DEFINED) \
+	 && (defined(VUINT8x8_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_MUL_DEFINED
 #endif
-
-#if !defined(VUINT8x16_DIV_DEFINED) && defined(VUINT8x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 8, 16, 8)
+#if !defined(VUINT8x16_DIV_DEFINED) \
+	 && (defined(VUINT8x8_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_div(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_DIV_DEFINED
 #endif
-
-#if !defined(VUINT8x16_MOD_DEFINED) && defined(VUINT8x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 8, 16, 8)
+#if !defined(VUINT8x16_MOD_DEFINED) \
+	 && (defined(VUINT8x8_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mod(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_MOD_DEFINED
 #endif
-
-#if !defined(VUINT8x16_AVG_DEFINED) && defined(VUINT8x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 8, 16, 8)
+#if !defined(VUINT8x16_AVG_DEFINED) \
+	 && (defined(VUINT8x8_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_AVG_DEFINED
 #endif
-
-#if !defined(VUINT8x16_AND_DEFINED) && defined(VUINT8x8_AND_DEFINED)
-VEC_DOUBLE_AND(u, 8, 16, 8)
+#if !defined(VUINT8x16_AND_DEFINED) \
+	 && (defined(VUINT8x8_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_AND_DEFINED
 #endif
-
-#if !defined(VUINT8x16_OR_DEFINED) && defined(VUINT8x8_OR_DEFINED)
-VEC_DOUBLE_OR(u, 8, 16, 8)
+#if !defined(VUINT8x16_OR_DEFINED) \
+	 && (defined(VUINT8x8_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_OR_DEFINED
 #endif
-
-#if !defined(VUINT8x16_XOR_DEFINED) && defined(VUINT8x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 8, 16, 8)
+#if !defined(VUINT8x16_XOR_DEFINED) \
+	 && (defined(VUINT8x8_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_XOR_DEFINED
 #endif
-
-#if !defined(VUINT8x16_NOT_DEFINED) && defined(VUINT8x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 8, 16, 8)
+#if !defined(VUINT8x16_NOT_DEFINED) \
+	 && (defined(VUINT8x8_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec)
+{
+	vec.dbl[0] = vuint8x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT8x16_NOT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_CMPLT_DEFINED) && defined(VUINT8x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 8, 16, 8)
+#if !defined(VUINT8x16_CMPLT_DEFINED) \
+	 && (defined(VUINT8x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_CMPEQ_DEFINED) && defined(VUINT8x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 8, 16, 8)
+#if !defined(VUINT8x16_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT8x16_CMPGT_DEFINED) && defined(VUINT8x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 8, 16, 8)
+#if !defined(VUINT8x16_CMPGT_DEFINED) \
+	 && (defined(VUINT8x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_CMPLE_DEFINED) && defined(VUINT8x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 8, 16, 8)
+#if !defined(VUINT8x16_CMPLE_DEFINED) \
+	 && (defined(VUINT8x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT8x16_CMPGE_DEFINED) && defined(VUINT8x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 8, 16, 8)
+#if !defined(VUINT8x16_CMPGE_DEFINED) \
+	 && (defined(VUINT8x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT8x16_MIN_DEFINED) && defined(VUINT8x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 8, 16, 8)
+#if !defined(VUINT8x16_MIN_DEFINED) \
+	 && (defined(VUINT8x8_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_MIN_DEFINED
 #endif
-
-#if !defined(VUINT8x16_MAX_DEFINED) && defined(VUINT8x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 8, 16, 8)
+#if !defined(VUINT8x16_MAX_DEFINED) \
+	 && (defined(VUINT8x8_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_MAX_DEFINED
 #endif
-
-#if !defined(VUINT8x16_RSHIFT_DEFINED) && defined(VUINT8x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 8, 16, 8)
+#if !defined(VUINT8x16_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_LRSHIFT_DEFINED) && defined(VUINT8x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 8, 16, 8)
+#if !defined(VUINT8x16_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x16_LSHIFT_DEFINED) && defined(VUINT8x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 8, 16, 8)
+#if !defined(VUINT8x16_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.dbl[0] = vuint8x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint8x32 */
-
-#if !defined(VINT8x32_SPLAT_DEFINED) && defined(VINT8x16_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_SPLAT_DEFINED) \
+	 && (defined(VINT8x16_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x)
+{
+	vint8x32 vec;
+	vec.dbl[0] = vint8x16_splat(x);
+	vec.dbl[1] = vint8x16_splat(x);
+	return vec;
+}
 # define VINT8x32_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) && defined(VINT8x16_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x16_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	vec.dbl[0] = vint8x16_load_aligned(x);
+	vec.dbl[1] = vint8x16_load_aligned(x + 16);
+	return vec;
+}
 # define VINT8x32_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x32_LOAD_DEFINED) && defined(VINT8x16_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_LOAD_DEFINED) \
+	 && (defined(VINT8x16_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	vec.dbl[0] = vint8x16_load(x);
+	vec.dbl[1] = vint8x16_load(x + 16);
+	return vec;
+}
 # define VINT8x32_LOAD_DEFINED
 #endif
-
-#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) && defined(VINT8x16_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x16_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32])
+{
+	vint8x16_store_aligned(vec.dbl[0], x);
+	vint8x16_store_aligned(vec.dbl[1], x + 16);
+}
 # define VINT8x32_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x32_STORE_DEFINED) && defined(VINT8x16_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_STORE_DEFINED) \
+	 && (defined(VINT8x16_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32])
+{
+	vint8x16_store(vec.dbl[0], x);
+	vint8x16_store(vec.dbl[1], x + 16);
+}
 # define VINT8x32_STORE_DEFINED
 #endif
-
-#if !defined(VINT8x32_ADD_DEFINED) && defined(VINT8x16_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_ADD_DEFINED) \
+	 && (defined(VINT8x16_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_ADD_DEFINED
 #endif
-
-#if !defined(VINT8x32_SUB_DEFINED) && defined(VINT8x16_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_SUB_DEFINED) \
+	 && (defined(VINT8x16_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_SUB_DEFINED
 #endif
-
-#if !defined(VINT8x32_MUL_DEFINED) && defined(VINT8x16_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_MUL_DEFINED) \
+	 && (defined(VINT8x16_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_MUL_DEFINED
 #endif
-
-#if !defined(VINT8x32_DIV_DEFINED) && defined(VINT8x16_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_DIV_DEFINED) \
+	 && (defined(VINT8x16_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_div(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_DIV_DEFINED
 #endif
-
-#if !defined(VINT8x32_MOD_DEFINED) && defined(VINT8x16_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_MOD_DEFINED) \
+	 && (defined(VINT8x16_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_mod(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_MOD_DEFINED
 #endif
-
-#if !defined(VINT8x32_AVG_DEFINED) && defined(VINT8x16_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_AVG_DEFINED) \
+	 && (defined(VINT8x16_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_AVG_DEFINED
 #endif
-
-#if !defined(VINT8x32_AND_DEFINED) && defined(VINT8x16_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_AND_DEFINED) \
+	 && (defined(VINT8x16_AND_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_AND_DEFINED
 #endif
-
-#if !defined(VINT8x32_OR_DEFINED) && defined(VINT8x16_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_OR_DEFINED) \
+	 && (defined(VINT8x16_OR_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_OR_DEFINED
 #endif
-
-#if !defined(VINT8x32_XOR_DEFINED) && defined(VINT8x16_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_XOR_DEFINED) \
+	 && (defined(VINT8x16_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_XOR_DEFINED
 #endif
-
-#if !defined(VINT8x32_NOT_DEFINED) && defined(VINT8x16_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_NOT_DEFINED) \
+	 && (defined(VINT8x16_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec)
+{
+	vec.dbl[0] = vint8x16_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x16_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT8x32_NOT_DEFINED
 #endif
-
-#if !defined(VINT8x32_CMPLT_DEFINED) && defined(VINT8x16_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_CMPLT_DEFINED) \
+	 && (defined(VINT8x16_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT8x32_CMPEQ_DEFINED) && defined(VINT8x16_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_CMPEQ_DEFINED) \
+	 && (defined(VINT8x16_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT8x32_CMPGT_DEFINED) && defined(VINT8x16_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_CMPGT_DEFINED) \
+	 && (defined(VINT8x16_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT8x32_CMPLE_DEFINED) && defined(VINT8x16_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_CMPLE_DEFINED) \
+	 && (defined(VINT8x16_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT8x32_CMPGE_DEFINED) && defined(VINT8x16_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_CMPGE_DEFINED) \
+	 && (defined(VINT8x16_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT8x32_MIN_DEFINED) && defined(VINT8x16_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_MIN_DEFINED) \
+	 && (defined(VINT8x16_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_MIN_DEFINED
 #endif
-
-#if !defined(VINT8x32_MAX_DEFINED) && defined(VINT8x16_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_MAX_DEFINED) \
+	 && (defined(VINT8x16_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_MAX_DEFINED
 #endif
-
-#if !defined(VINT8x32_RSHIFT_DEFINED) && defined(VINT8x16_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_RSHIFT_DEFINED) \
+	 && (defined(VINT8x16_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x32_LRSHIFT_DEFINED) && defined(VINT8x16_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x16_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x32_LSHIFT_DEFINED) && defined(VINT8x16_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 8, 32, 16)
+#if !defined(VINT8x32_LSHIFT_DEFINED) \
+	 && (defined(VINT8x16_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vint8x16_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x16_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x32_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint8x32 */
-
-#if !defined(VUINT8x32_SPLAT_DEFINED) && defined(VUINT8x16_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 8, 32, 16)
+#if !defined(VUINT8x32_SPLAT_DEFINED) \
+	 && (defined(VUINT8x16_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x)
+{
+	vuint8x32 vec;
+	vec.dbl[0] = vuint8x16_splat(x);
+	vec.dbl[1] = vuint8x16_splat(x);
+	return vec;
+}
 # define VUINT8x32_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) && defined(VUINT8x16_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 8, 32, 16)
+#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x16_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	vec.dbl[0] = vuint8x16_load_aligned(x);
+	vec.dbl[1] = vuint8x16_load_aligned(x + 16);
+	return vec;
+}
 # define VUINT8x32_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x32_LOAD_DEFINED) && defined(VUINT8x16_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 8, 32, 16)
+#if !defined(VUINT8x32_LOAD_DEFINED) \
+	 && (defined(VUINT8x16_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	vec.dbl[0] = vuint8x16_load(x);
+	vec.dbl[1] = vuint8x16_load(x + 16);
+	return vec;
+}
 # define VUINT8x32_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) && defined(VUINT8x16_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 8, 32, 16)
+#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x16_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32])
+{
+	vuint8x16_store_aligned(vec.dbl[0], x);
+	vuint8x16_store_aligned(vec.dbl[1], x + 16);
+}
 # define VUINT8x32_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x32_STORE_DEFINED) && defined(VUINT8x16_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 8, 32, 16)
+#if !defined(VUINT8x32_STORE_DEFINED) \
+	 && (defined(VUINT8x16_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32])
+{
+	vuint8x16_store(vec.dbl[0], x);
+	vuint8x16_store(vec.dbl[1], x + 16);
+}
 # define VUINT8x32_STORE_DEFINED
 #endif
-
-#if !defined(VUINT8x32_ADD_DEFINED) && defined(VUINT8x16_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 8, 32, 16)
+#if !defined(VUINT8x32_ADD_DEFINED) \
+	 && (defined(VUINT8x16_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_ADD_DEFINED
 #endif
-
-#if !defined(VUINT8x32_SUB_DEFINED) && defined(VUINT8x16_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 8, 32, 16)
+#if !defined(VUINT8x32_SUB_DEFINED) \
+	 && (defined(VUINT8x16_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_SUB_DEFINED
 #endif
-
-#if !defined(VUINT8x32_MUL_DEFINED) && defined(VUINT8x16_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 8, 32, 16)
+#if !defined(VUINT8x32_MUL_DEFINED) \
+	 && (defined(VUINT8x16_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_MUL_DEFINED
 #endif
-
-#if !defined(VUINT8x32_DIV_DEFINED) && defined(VUINT8x16_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 8, 32, 16)
+#if !defined(VUINT8x32_DIV_DEFINED) \
+	 && (defined(VUINT8x16_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_div(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_DIV_DEFINED
 #endif
-
-#if !defined(VUINT8x32_MOD_DEFINED) && defined(VUINT8x16_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 8, 32, 16)
+#if !defined(VUINT8x32_MOD_DEFINED) \
+	 && (defined(VUINT8x16_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_mod(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_MOD_DEFINED
 #endif
-
-#if !defined(VUINT8x32_AVG_DEFINED) && defined(VUINT8x16_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 8, 32, 16)
+#if !defined(VUINT8x32_AVG_DEFINED) \
+	 && (defined(VUINT8x16_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_AVG_DEFINED
 #endif
-
-#if !defined(VUINT8x32_AND_DEFINED) && defined(VUINT8x16_AND_DEFINED)
-VEC_DOUBLE_AND(u, 8, 32, 16)
+#if !defined(VUINT8x32_AND_DEFINED) \
+	 && (defined(VUINT8x16_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_AND_DEFINED
 #endif
-
-#if !defined(VUINT8x32_OR_DEFINED) && defined(VUINT8x16_OR_DEFINED)
-VEC_DOUBLE_OR(u, 8, 32, 16)
+#if !defined(VUINT8x32_OR_DEFINED) \
+	 && (defined(VUINT8x16_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_OR_DEFINED
 #endif
-
-#if !defined(VUINT8x32_XOR_DEFINED) && defined(VUINT8x16_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 8, 32, 16)
+#if !defined(VUINT8x32_XOR_DEFINED) \
+	 && (defined(VUINT8x16_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_XOR_DEFINED
 #endif
-
-#if !defined(VUINT8x32_NOT_DEFINED) && defined(VUINT8x16_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 8, 32, 16)
+#if !defined(VUINT8x32_NOT_DEFINED) \
+	 && (defined(VUINT8x16_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec)
+{
+	vec.dbl[0] = vuint8x16_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x16_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT8x32_NOT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_CMPLT_DEFINED) && defined(VUINT8x16_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 8, 32, 16)
+#if !defined(VUINT8x32_CMPLT_DEFINED) \
+	 && (defined(VUINT8x16_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_CMPEQ_DEFINED) && defined(VUINT8x16_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 8, 32, 16)
+#if !defined(VUINT8x32_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x16_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT8x32_CMPGT_DEFINED) && defined(VUINT8x16_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 8, 32, 16)
+#if !defined(VUINT8x32_CMPGT_DEFINED) \
+	 && (defined(VUINT8x16_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_CMPLE_DEFINED) && defined(VUINT8x16_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 8, 32, 16)
+#if !defined(VUINT8x32_CMPLE_DEFINED) \
+	 && (defined(VUINT8x16_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT8x32_CMPGE_DEFINED) && defined(VUINT8x16_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 8, 32, 16)
+#if !defined(VUINT8x32_CMPGE_DEFINED) \
+	 && (defined(VUINT8x16_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT8x32_MIN_DEFINED) && defined(VUINT8x16_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 8, 32, 16)
+#if !defined(VUINT8x32_MIN_DEFINED) \
+	 && (defined(VUINT8x16_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_MIN_DEFINED
 #endif
-
-#if !defined(VUINT8x32_MAX_DEFINED) && defined(VUINT8x16_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 8, 32, 16)
+#if !defined(VUINT8x32_MAX_DEFINED) \
+	 && (defined(VUINT8x16_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_MAX_DEFINED
 #endif
-
-#if !defined(VUINT8x32_RSHIFT_DEFINED) && defined(VUINT8x16_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 8, 32, 16)
+#if !defined(VUINT8x32_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x16_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_LRSHIFT_DEFINED) && defined(VUINT8x16_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 8, 32, 16)
+#if !defined(VUINT8x32_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x16_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x32_LSHIFT_DEFINED) && defined(VUINT8x16_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 8, 32, 16)
+#if !defined(VUINT8x32_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x16_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.dbl[0] = vuint8x16_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x16_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x32_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint8x64 */
-
-#if !defined(VINT8x64_SPLAT_DEFINED) && defined(VINT8x32_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_SPLAT_DEFINED) \
+	 && (defined(VINT8x32_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x)
+{
+	vint8x64 vec;
+	vec.dbl[0] = vint8x32_splat(x);
+	vec.dbl[1] = vint8x32_splat(x);
+	return vec;
+}
 # define VINT8x64_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) && defined(VINT8x32_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT8x32_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	vec.dbl[0] = vint8x32_load_aligned(x);
+	vec.dbl[1] = vint8x32_load_aligned(x + 32);
+	return vec;
+}
 # define VINT8x64_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x64_LOAD_DEFINED) && defined(VINT8x32_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_LOAD_DEFINED) \
+	 && (defined(VINT8x32_LOAD_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	vec.dbl[0] = vint8x32_load(x);
+	vec.dbl[1] = vint8x32_load(x + 32);
+	return vec;
+}
 # define VINT8x64_LOAD_DEFINED
 #endif
-
-#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) && defined(VINT8x32_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT8x32_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64])
+{
+	vint8x32_store_aligned(vec.dbl[0], x);
+	vint8x32_store_aligned(vec.dbl[1], x + 32);
+}
 # define VINT8x64_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT8x64_STORE_DEFINED) && defined(VINT8x32_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_STORE_DEFINED) \
+	 && (defined(VINT8x32_STORE_DEFINED))
+VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64])
+{
+	vint8x32_store(vec.dbl[0], x);
+	vint8x32_store(vec.dbl[1], x + 32);
+}
 # define VINT8x64_STORE_DEFINED
 #endif
-
-#if !defined(VINT8x64_ADD_DEFINED) && defined(VINT8x32_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_ADD_DEFINED) \
+	 && (defined(VINT8x32_ADD_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_ADD_DEFINED
 #endif
-
-#if !defined(VINT8x64_SUB_DEFINED) && defined(VINT8x32_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_SUB_DEFINED) \
+	 && (defined(VINT8x32_SUB_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_SUB_DEFINED
 #endif
-
-#if !defined(VINT8x64_MUL_DEFINED) && defined(VINT8x32_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_MUL_DEFINED) \
+	 && (defined(VINT8x32_MUL_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_MUL_DEFINED
 #endif
-
-#if !defined(VINT8x64_DIV_DEFINED) && defined(VINT8x32_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_DIV_DEFINED) \
+	 && (defined(VINT8x32_DIV_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_div(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_DIV_DEFINED
 #endif
-
-#if !defined(VINT8x64_MOD_DEFINED) && defined(VINT8x32_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_MOD_DEFINED) \
+	 && (defined(VINT8x32_MOD_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_mod(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_MOD_DEFINED
 #endif
-
-#if !defined(VINT8x64_AVG_DEFINED) && defined(VINT8x32_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_AVG_DEFINED) \
+	 && (defined(VINT8x32_AVG_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_AVG_DEFINED
 #endif
-
-#if !defined(VINT8x64_AND_DEFINED) && defined(VINT8x32_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_AND_DEFINED) \
+	 && (defined(VINT8x32_AND_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_AND_DEFINED
 #endif
-
-#if !defined(VINT8x64_OR_DEFINED) && defined(VINT8x32_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_OR_DEFINED) \
+	 && (defined(VINT8x32_OR_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_OR_DEFINED
 #endif
-
-#if !defined(VINT8x64_XOR_DEFINED) && defined(VINT8x32_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_XOR_DEFINED) \
+	 && (defined(VINT8x32_XOR_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_XOR_DEFINED
 #endif
-
-#if !defined(VINT8x64_NOT_DEFINED) && defined(VINT8x32_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_NOT_DEFINED) \
+	 && (defined(VINT8x32_NOT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec)
+{
+	vec.dbl[0] = vint8x32_not(vec.dbl[0]);
+	vec1.dbl[1] = vint8x32_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT8x64_NOT_DEFINED
 #endif
-
-#if !defined(VINT8x64_CMPLT_DEFINED) && defined(VINT8x32_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_CMPLT_DEFINED) \
+	 && (defined(VINT8x32_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT8x64_CMPEQ_DEFINED) && defined(VINT8x32_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_CMPEQ_DEFINED) \
+	 && (defined(VINT8x32_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT8x64_CMPGT_DEFINED) && defined(VINT8x32_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_CMPGT_DEFINED) \
+	 && (defined(VINT8x32_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT8x64_CMPLE_DEFINED) && defined(VINT8x32_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_CMPLE_DEFINED) \
+	 && (defined(VINT8x32_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT8x64_CMPGE_DEFINED) && defined(VINT8x32_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_CMPGE_DEFINED) \
+	 && (defined(VINT8x32_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT8x64_MIN_DEFINED) && defined(VINT8x32_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_MIN_DEFINED) \
+	 && (defined(VINT8x32_MIN_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_MIN_DEFINED
 #endif
-
-#if !defined(VINT8x64_MAX_DEFINED) && defined(VINT8x32_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_MAX_DEFINED) \
+	 && (defined(VINT8x32_MAX_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_MAX_DEFINED
 #endif
-
-#if !defined(VINT8x64_RSHIFT_DEFINED) && defined(VINT8x32_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_RSHIFT_DEFINED) \
+	 && (defined(VINT8x32_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x64_LRSHIFT_DEFINED) && defined(VINT8x32_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_LRSHIFT_DEFINED) \
+	 && (defined(VINT8x32_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT8x64_LSHIFT_DEFINED) && defined(VINT8x32_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 8, 64, 32)
+#if !defined(VINT8x64_LSHIFT_DEFINED) \
+	 && (defined(VINT8x32_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vint8x32_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint8x32_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT8x64_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint8x64 */
-
-#if !defined(VUINT8x64_SPLAT_DEFINED) && defined(VUINT8x32_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 8, 64, 32)
+#if !defined(VUINT8x64_SPLAT_DEFINED) \
+	 && (defined(VUINT8x32_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x)
+{
+	vuint8x64 vec;
+	vec.dbl[0] = vuint8x32_splat(x);
+	vec.dbl[1] = vuint8x32_splat(x);
+	return vec;
+}
 # define VUINT8x64_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) && defined(VUINT8x32_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 8, 64, 32)
+#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x32_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	vec.dbl[0] = vuint8x32_load_aligned(x);
+	vec.dbl[1] = vuint8x32_load_aligned(x + 32);
+	return vec;
+}
 # define VUINT8x64_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x64_LOAD_DEFINED) && defined(VUINT8x32_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 8, 64, 32)
+#if !defined(VUINT8x64_LOAD_DEFINED) \
+	 && (defined(VUINT8x32_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	vec.dbl[0] = vuint8x32_load(x);
+	vec.dbl[1] = vuint8x32_load(x + 32);
+	return vec;
+}
 # define VUINT8x64_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) && defined(VUINT8x32_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 8, 64, 32)
+#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT8x32_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64])
+{
+	vuint8x32_store_aligned(vec.dbl[0], x);
+	vuint8x32_store_aligned(vec.dbl[1], x + 32);
+}
 # define VUINT8x64_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT8x64_STORE_DEFINED) && defined(VUINT8x32_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 8, 64, 32)
+#if !defined(VUINT8x64_STORE_DEFINED) \
+	 && (defined(VUINT8x32_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64])
+{
+	vuint8x32_store(vec.dbl[0], x);
+	vuint8x32_store(vec.dbl[1], x + 32);
+}
 # define VUINT8x64_STORE_DEFINED
 #endif
-
-#if !defined(VUINT8x64_ADD_DEFINED) && defined(VUINT8x32_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 8, 64, 32)
+#if !defined(VUINT8x64_ADD_DEFINED) \
+	 && (defined(VUINT8x32_ADD_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_ADD_DEFINED
 #endif
-
-#if !defined(VUINT8x64_SUB_DEFINED) && defined(VUINT8x32_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 8, 64, 32)
+#if !defined(VUINT8x64_SUB_DEFINED) \
+	 && (defined(VUINT8x32_SUB_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_SUB_DEFINED
 #endif
-
-#if !defined(VUINT8x64_MUL_DEFINED) && defined(VUINT8x32_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 8, 64, 32)
+#if !defined(VUINT8x64_MUL_DEFINED) \
+	 && (defined(VUINT8x32_MUL_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_MUL_DEFINED
 #endif
-
-#if !defined(VUINT8x64_DIV_DEFINED) && defined(VUINT8x32_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 8, 64, 32)
+#if !defined(VUINT8x64_DIV_DEFINED) \
+	 && (defined(VUINT8x32_DIV_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_div(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_DIV_DEFINED
 #endif
-
-#if !defined(VUINT8x64_MOD_DEFINED) && defined(VUINT8x32_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 8, 64, 32)
+#if !defined(VUINT8x64_MOD_DEFINED) \
+	 && (defined(VUINT8x32_MOD_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_mod(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_MOD_DEFINED
 #endif
-
-#if !defined(VUINT8x64_AVG_DEFINED) && defined(VUINT8x32_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 8, 64, 32)
+#if !defined(VUINT8x64_AVG_DEFINED) \
+	 && (defined(VUINT8x32_AVG_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_AVG_DEFINED
 #endif
-
-#if !defined(VUINT8x64_AND_DEFINED) && defined(VUINT8x32_AND_DEFINED)
-VEC_DOUBLE_AND(u, 8, 64, 32)
+#if !defined(VUINT8x64_AND_DEFINED) \
+	 && (defined(VUINT8x32_AND_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_AND_DEFINED
 #endif
-
-#if !defined(VUINT8x64_OR_DEFINED) && defined(VUINT8x32_OR_DEFINED)
-VEC_DOUBLE_OR(u, 8, 64, 32)
+#if !defined(VUINT8x64_OR_DEFINED) \
+	 && (defined(VUINT8x32_OR_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_OR_DEFINED
 #endif
-
-#if !defined(VUINT8x64_XOR_DEFINED) && defined(VUINT8x32_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 8, 64, 32)
+#if !defined(VUINT8x64_XOR_DEFINED) \
+	 && (defined(VUINT8x32_XOR_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_XOR_DEFINED
 #endif
-
-#if !defined(VUINT8x64_NOT_DEFINED) && defined(VUINT8x32_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 8, 64, 32)
+#if !defined(VUINT8x64_NOT_DEFINED) \
+	 && (defined(VUINT8x32_NOT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec)
+{
+	vec.dbl[0] = vuint8x32_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint8x32_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT8x64_NOT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_CMPLT_DEFINED) && defined(VUINT8x32_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 8, 64, 32)
+#if !defined(VUINT8x64_CMPLT_DEFINED) \
+	 && (defined(VUINT8x32_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_CMPEQ_DEFINED) && defined(VUINT8x32_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 8, 64, 32)
+#if !defined(VUINT8x64_CMPEQ_DEFINED) \
+	 && (defined(VUINT8x32_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT8x64_CMPGT_DEFINED) && defined(VUINT8x32_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 8, 64, 32)
+#if !defined(VUINT8x64_CMPGT_DEFINED) \
+	 && (defined(VUINT8x32_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_CMPLE_DEFINED) && defined(VUINT8x32_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 8, 64, 32)
+#if !defined(VUINT8x64_CMPLE_DEFINED) \
+	 && (defined(VUINT8x32_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT8x64_CMPGE_DEFINED) && defined(VUINT8x32_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 8, 64, 32)
+#if !defined(VUINT8x64_CMPGE_DEFINED) \
+	 && (defined(VUINT8x32_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT8x64_MIN_DEFINED) && defined(VUINT8x32_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 8, 64, 32)
+#if !defined(VUINT8x64_MIN_DEFINED) \
+	 && (defined(VUINT8x32_MIN_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_MIN_DEFINED
 #endif
-
-#if !defined(VUINT8x64_MAX_DEFINED) && defined(VUINT8x32_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 8, 64, 32)
+#if !defined(VUINT8x64_MAX_DEFINED) \
+	 && (defined(VUINT8x32_MAX_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_MAX_DEFINED
 #endif
-
-#if !defined(VUINT8x64_RSHIFT_DEFINED) && defined(VUINT8x32_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 8, 64, 32)
+#if !defined(VUINT8x64_RSHIFT_DEFINED) \
+	 && (defined(VUINT8x32_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_LRSHIFT_DEFINED) && defined(VUINT8x32_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 8, 64, 32)
+#if !defined(VUINT8x64_LRSHIFT_DEFINED) \
+	 && (defined(VUINT8x32_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT8x64_LSHIFT_DEFINED) && defined(VUINT8x32_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 8, 64, 32)
+#if !defined(VUINT8x64_LSHIFT_DEFINED) \
+	 && (defined(VUINT8x32_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.dbl[0] = vuint8x32_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint8x32_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT8x64_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint16x4 */
-
-#if !defined(VINT16x4_SPLAT_DEFINED) && defined(VINT16x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x2_SPLAT_DEFINED) \
+	 && (defined(VINT16x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x)
+{
+	vint16x2 vec;
+	vec.dbl[0] = vint16x1_splat(x);
+	vec.dbl[1] = vint16x1_splat(x);
+	return vec;
+}
+# define VINT16x2_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT16x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	vec.dbl[0] = vint16x1_load_aligned(x);
+	vec.dbl[1] = vint16x1_load_aligned(x + 1);
+	return vec;
+}
+# define VINT16x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x2_LOAD_DEFINED) \
+	 && (defined(VINT16x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	vec.dbl[0] = vint16x1_load(x);
+	vec.dbl[1] = vint16x1_load(x + 1);
+	return vec;
+}
+# define VINT16x2_LOAD_DEFINED
+#endif
+#if !defined(VINT16x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT16x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2])
+{
+	vint16x1_store_aligned(vec.dbl[0], x);
+	vint16x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VINT16x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x2_STORE_DEFINED) \
+	 && (defined(VINT16x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2])
+{
+	vint16x1_store(vec.dbl[0], x);
+	vint16x1_store(vec.dbl[1], x + 1);
+}
+# define VINT16x2_STORE_DEFINED
+#endif
+#if !defined(VINT16x2_ADD_DEFINED) \
+	 && (defined(VINT16x1_ADD_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_ADD_DEFINED
+#endif
+#if !defined(VINT16x2_SUB_DEFINED) \
+	 && (defined(VINT16x1_SUB_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_SUB_DEFINED
+#endif
+#if !defined(VINT16x2_MUL_DEFINED) \
+	 && (defined(VINT16x1_MUL_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_MUL_DEFINED
+#endif
+#if !defined(VINT16x2_DIV_DEFINED) \
+	 && (defined(VINT16x1_DIV_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_div(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_DIV_DEFINED
+#endif
+#if !defined(VINT16x2_MOD_DEFINED) \
+	 && (defined(VINT16x1_MOD_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_mod(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_MOD_DEFINED
+#endif
+#if !defined(VINT16x2_AVG_DEFINED) \
+	 && (defined(VINT16x1_AVG_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_AVG_DEFINED
+#endif
+#if !defined(VINT16x2_AND_DEFINED) \
+	 && (defined(VINT16x1_AND_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_AND_DEFINED
+#endif
+#if !defined(VINT16x2_OR_DEFINED) \
+	 && (defined(VINT16x1_OR_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_OR_DEFINED
+#endif
+#if !defined(VINT16x2_XOR_DEFINED) \
+	 && (defined(VINT16x1_XOR_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_XOR_DEFINED
+#endif
+#if !defined(VINT16x2_NOT_DEFINED) \
+	 && (defined(VINT16x1_NOT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec)
+{
+	vec.dbl[0] = vint16x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vint16x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VINT16x2_NOT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPLT_DEFINED) \
+	 && (defined(VINT16x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPEQ_DEFINED) \
+	 && (defined(VINT16x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x2_CMPGT_DEFINED) \
+	 && (defined(VINT16x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPLE_DEFINED) \
+	 && (defined(VINT16x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x2_CMPGE_DEFINED) \
+	 && (defined(VINT16x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x2_MIN_DEFINED) \
+	 && (defined(VINT16x1_MIN_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_MIN_DEFINED
+#endif
+#if !defined(VINT16x2_MAX_DEFINED) \
+	 && (defined(VINT16x1_MAX_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_MAX_DEFINED
+#endif
+#if !defined(VINT16x2_RSHIFT_DEFINED) \
+	 && (defined(VINT16x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x2_LRSHIFT_DEFINED) \
+	 && (defined(VINT16x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x2_LSHIFT_DEFINED) \
+	 && (defined(VINT16x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vint16x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT16x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x2_SPLAT_DEFINED) \
+	 && (defined(VUINT16x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x)
+{
+	vuint16x2 vec;
+	vec.dbl[0] = vuint16x1_splat(x);
+	vec.dbl[1] = vuint16x1_splat(x);
+	return vec;
+}
+# define VUINT16x2_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	vec.dbl[0] = vuint16x1_load_aligned(x);
+	vec.dbl[1] = vuint16x1_load_aligned(x + 1);
+	return vec;
+}
+# define VUINT16x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x2_LOAD_DEFINED) \
+	 && (defined(VUINT16x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	vec.dbl[0] = vuint16x1_load(x);
+	vec.dbl[1] = vuint16x1_load(x + 1);
+	return vec;
+}
+# define VUINT16x2_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2])
+{
+	vuint16x1_store_aligned(vec.dbl[0], x);
+	vuint16x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VUINT16x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x2_STORE_DEFINED) \
+	 && (defined(VUINT16x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2])
+{
+	vuint16x1_store(vec.dbl[0], x);
+	vuint16x1_store(vec.dbl[1], x + 1);
+}
+# define VUINT16x2_STORE_DEFINED
+#endif
+#if !defined(VUINT16x2_ADD_DEFINED) \
+	 && (defined(VUINT16x1_ADD_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_ADD_DEFINED
+#endif
+#if !defined(VUINT16x2_SUB_DEFINED) \
+	 && (defined(VUINT16x1_SUB_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_SUB_DEFINED
+#endif
+#if !defined(VUINT16x2_MUL_DEFINED) \
+	 && (defined(VUINT16x1_MUL_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_MUL_DEFINED
+#endif
+#if !defined(VUINT16x2_DIV_DEFINED) \
+	 && (defined(VUINT16x1_DIV_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_div(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_DIV_DEFINED
+#endif
+#if !defined(VUINT16x2_MOD_DEFINED) \
+	 && (defined(VUINT16x1_MOD_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_mod(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_MOD_DEFINED
+#endif
+#if !defined(VUINT16x2_AVG_DEFINED) \
+	 && (defined(VUINT16x1_AVG_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_AVG_DEFINED
+#endif
+#if !defined(VUINT16x2_AND_DEFINED) \
+	 && (defined(VUINT16x1_AND_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_AND_DEFINED
+#endif
+#if !defined(VUINT16x2_OR_DEFINED) \
+	 && (defined(VUINT16x1_OR_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_OR_DEFINED
+#endif
+#if !defined(VUINT16x2_XOR_DEFINED) \
+	 && (defined(VUINT16x1_XOR_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_XOR_DEFINED
+#endif
+#if !defined(VUINT16x2_NOT_DEFINED) \
+	 && (defined(VUINT16x1_NOT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec)
+{
+	vec.dbl[0] = vuint16x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint16x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VUINT16x2_NOT_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPLT_DEFINED) \
+	 && (defined(VUINT16x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPEQ_DEFINED) \
+	 && (defined(VUINT16x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPGT_DEFINED) \
+	 && (defined(VUINT16x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPLE_DEFINED) \
+	 && (defined(VUINT16x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPGE_DEFINED) \
+	 && (defined(VUINT16x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x2_MIN_DEFINED) \
+	 && (defined(VUINT16x1_MIN_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_MIN_DEFINED
+#endif
+#if !defined(VUINT16x2_MAX_DEFINED) \
+	 && (defined(VUINT16x1_MAX_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_MAX_DEFINED
+#endif
+#if !defined(VUINT16x2_RSHIFT_DEFINED) \
+	 && (defined(VUINT16x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x2_LRSHIFT_DEFINED) \
+	 && (defined(VUINT16x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x2_LSHIFT_DEFINED) \
+	 && (defined(VUINT16x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.dbl[0] = vuint16x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT16x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x4_SPLAT_DEFINED) \
+	 && (defined(VINT16x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x)
+{
+	vint16x4 vec;
+	vec.dbl[0] = vint16x2_splat(x);
+	vec.dbl[1] = vint16x2_splat(x);
+	return vec;
+}
 # define VINT16x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) && defined(VINT16x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT16x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	vec.dbl[0] = vint16x2_load_aligned(x);
+	vec.dbl[1] = vint16x2_load_aligned(x + 2);
+	return vec;
+}
 # define VINT16x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x4_LOAD_DEFINED) && defined(VINT16x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_LOAD_DEFINED) \
+	 && (defined(VINT16x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	vec.dbl[0] = vint16x2_load(x);
+	vec.dbl[1] = vint16x2_load(x + 2);
+	return vec;
+}
 # define VINT16x4_LOAD_DEFINED
 #endif
-
-#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) && defined(VINT16x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT16x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4])
+{
+	vint16x2_store_aligned(vec.dbl[0], x);
+	vint16x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VINT16x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x4_STORE_DEFINED) && defined(VINT16x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_STORE_DEFINED) \
+	 && (defined(VINT16x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4])
+{
+	vint16x2_store(vec.dbl[0], x);
+	vint16x2_store(vec.dbl[1], x + 2);
+}
 # define VINT16x4_STORE_DEFINED
 #endif
-
-#if !defined(VINT16x4_ADD_DEFINED) && defined(VINT16x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_ADD_DEFINED) \
+	 && (defined(VINT16x2_ADD_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_ADD_DEFINED
 #endif
-
-#if !defined(VINT16x4_SUB_DEFINED) && defined(VINT16x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_SUB_DEFINED) \
+	 && (defined(VINT16x2_SUB_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_SUB_DEFINED
 #endif
-
-#if !defined(VINT16x4_MUL_DEFINED) && defined(VINT16x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_MUL_DEFINED) \
+	 && (defined(VINT16x2_MUL_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_MUL_DEFINED
 #endif
-
-#if !defined(VINT16x4_DIV_DEFINED) && defined(VINT16x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_DIV_DEFINED) \
+	 && (defined(VINT16x2_DIV_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_div(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_DIV_DEFINED
 #endif
-
-#if !defined(VINT16x4_MOD_DEFINED) && defined(VINT16x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_MOD_DEFINED) \
+	 && (defined(VINT16x2_MOD_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_mod(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_MOD_DEFINED
 #endif
-
-#if !defined(VINT16x4_AVG_DEFINED) && defined(VINT16x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_AVG_DEFINED) \
+	 && (defined(VINT16x2_AVG_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_AVG_DEFINED
 #endif
-
-#if !defined(VINT16x4_AND_DEFINED) && defined(VINT16x2_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_AND_DEFINED) \
+	 && (defined(VINT16x2_AND_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_AND_DEFINED
 #endif
-
-#if !defined(VINT16x4_OR_DEFINED) && defined(VINT16x2_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_OR_DEFINED) \
+	 && (defined(VINT16x2_OR_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_OR_DEFINED
 #endif
-
-#if !defined(VINT16x4_XOR_DEFINED) && defined(VINT16x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_XOR_DEFINED) \
+	 && (defined(VINT16x2_XOR_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_XOR_DEFINED
 #endif
-
-#if !defined(VINT16x4_NOT_DEFINED) && defined(VINT16x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_NOT_DEFINED) \
+	 && (defined(VINT16x2_NOT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec)
+{
+	vec.dbl[0] = vint16x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vint16x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT16x4_NOT_DEFINED
 #endif
-
-#if !defined(VINT16x4_CMPLT_DEFINED) && defined(VINT16x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_CMPLT_DEFINED) \
+	 && (defined(VINT16x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT16x4_CMPEQ_DEFINED) && defined(VINT16x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_CMPEQ_DEFINED) \
+	 && (defined(VINT16x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT16x4_CMPGT_DEFINED) && defined(VINT16x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_CMPGT_DEFINED) \
+	 && (defined(VINT16x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT16x4_CMPLE_DEFINED) && defined(VINT16x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_CMPLE_DEFINED) \
+	 && (defined(VINT16x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT16x4_CMPGE_DEFINED) && defined(VINT16x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_CMPGE_DEFINED) \
+	 && (defined(VINT16x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT16x4_MIN_DEFINED) && defined(VINT16x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_MIN_DEFINED) \
+	 && (defined(VINT16x2_MIN_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_MIN_DEFINED
 #endif
-
-#if !defined(VINT16x4_MAX_DEFINED) && defined(VINT16x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_MAX_DEFINED) \
+	 && (defined(VINT16x2_MAX_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_MAX_DEFINED
 #endif
-
-#if !defined(VINT16x4_RSHIFT_DEFINED) && defined(VINT16x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_RSHIFT_DEFINED) \
+	 && (defined(VINT16x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x4_LRSHIFT_DEFINED) && defined(VINT16x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_LRSHIFT_DEFINED) \
+	 && (defined(VINT16x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x4_LSHIFT_DEFINED) && defined(VINT16x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 16, 4, 2)
+#if !defined(VINT16x4_LSHIFT_DEFINED) \
+	 && (defined(VINT16x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vint16x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint16x4 */
-
-#if !defined(VUINT16x4_SPLAT_DEFINED) && defined(VUINT16x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 16, 4, 2)
+#if !defined(VUINT16x4_SPLAT_DEFINED) \
+	 && (defined(VUINT16x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x)
+{
+	vuint16x4 vec;
+	vec.dbl[0] = vuint16x2_splat(x);
+	vec.dbl[1] = vuint16x2_splat(x);
+	return vec;
+}
 # define VUINT16x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) && defined(VUINT16x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 16, 4, 2)
+#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	vec.dbl[0] = vuint16x2_load_aligned(x);
+	vec.dbl[1] = vuint16x2_load_aligned(x + 2);
+	return vec;
+}
 # define VUINT16x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x4_LOAD_DEFINED) && defined(VUINT16x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 16, 4, 2)
+#if !defined(VUINT16x4_LOAD_DEFINED) \
+	 && (defined(VUINT16x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	vec.dbl[0] = vuint16x2_load(x);
+	vec.dbl[1] = vuint16x2_load(x + 2);
+	return vec;
+}
 # define VUINT16x4_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) && defined(VUINT16x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 16, 4, 2)
+#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4])
+{
+	vuint16x2_store_aligned(vec.dbl[0], x);
+	vuint16x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VUINT16x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x4_STORE_DEFINED) && defined(VUINT16x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 16, 4, 2)
+#if !defined(VUINT16x4_STORE_DEFINED) \
+	 && (defined(VUINT16x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4])
+{
+	vuint16x2_store(vec.dbl[0], x);
+	vuint16x2_store(vec.dbl[1], x + 2);
+}
 # define VUINT16x4_STORE_DEFINED
 #endif
-
-#if !defined(VUINT16x4_ADD_DEFINED) && defined(VUINT16x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 16, 4, 2)
+#if !defined(VUINT16x4_ADD_DEFINED) \
+	 && (defined(VUINT16x2_ADD_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_ADD_DEFINED
 #endif
-
-#if !defined(VUINT16x4_SUB_DEFINED) && defined(VUINT16x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 16, 4, 2)
+#if !defined(VUINT16x4_SUB_DEFINED) \
+	 && (defined(VUINT16x2_SUB_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_SUB_DEFINED
 #endif
-
-#if !defined(VUINT16x4_MUL_DEFINED) && defined(VUINT16x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 16, 4, 2)
+#if !defined(VUINT16x4_MUL_DEFINED) \
+	 && (defined(VUINT16x2_MUL_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_MUL_DEFINED
 #endif
-
-#if !defined(VUINT16x4_DIV_DEFINED) && defined(VUINT16x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 16, 4, 2)
+#if !defined(VUINT16x4_DIV_DEFINED) \
+	 && (defined(VUINT16x2_DIV_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_div(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_DIV_DEFINED
 #endif
-
-#if !defined(VUINT16x4_MOD_DEFINED) && defined(VUINT16x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 16, 4, 2)
+#if !defined(VUINT16x4_MOD_DEFINED) \
+	 && (defined(VUINT16x2_MOD_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_mod(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_MOD_DEFINED
 #endif
-
-#if !defined(VUINT16x4_AVG_DEFINED) && defined(VUINT16x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 16, 4, 2)
+#if !defined(VUINT16x4_AVG_DEFINED) \
+	 && (defined(VUINT16x2_AVG_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_AVG_DEFINED
 #endif
-
-#if !defined(VUINT16x4_AND_DEFINED) && defined(VUINT16x2_AND_DEFINED)
-VEC_DOUBLE_AND(u, 16, 4, 2)
+#if !defined(VUINT16x4_AND_DEFINED) \
+	 && (defined(VUINT16x2_AND_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_AND_DEFINED
 #endif
-
-#if !defined(VUINT16x4_OR_DEFINED) && defined(VUINT16x2_OR_DEFINED)
-VEC_DOUBLE_OR(u, 16, 4, 2)
+#if !defined(VUINT16x4_OR_DEFINED) \
+	 && (defined(VUINT16x2_OR_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_OR_DEFINED
 #endif
-
-#if !defined(VUINT16x4_XOR_DEFINED) && defined(VUINT16x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 16, 4, 2)
+#if !defined(VUINT16x4_XOR_DEFINED) \
+	 && (defined(VUINT16x2_XOR_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_XOR_DEFINED
 #endif
-
-#if !defined(VUINT16x4_NOT_DEFINED) && defined(VUINT16x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 16, 4, 2)
+#if !defined(VUINT16x4_NOT_DEFINED) \
+	 && (defined(VUINT16x2_NOT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec)
+{
+	vec.dbl[0] = vuint16x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint16x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT16x4_NOT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_CMPLT_DEFINED) && defined(VUINT16x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 16, 4, 2)
+#if !defined(VUINT16x4_CMPLT_DEFINED) \
+	 && (defined(VUINT16x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_CMPEQ_DEFINED) && defined(VUINT16x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 16, 4, 2)
+#if !defined(VUINT16x4_CMPEQ_DEFINED) \
+	 && (defined(VUINT16x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT16x4_CMPGT_DEFINED) && defined(VUINT16x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 16, 4, 2)
+#if !defined(VUINT16x4_CMPGT_DEFINED) \
+	 && (defined(VUINT16x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_CMPLE_DEFINED) && defined(VUINT16x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 16, 4, 2)
+#if !defined(VUINT16x4_CMPLE_DEFINED) \
+	 && (defined(VUINT16x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT16x4_CMPGE_DEFINED) && defined(VUINT16x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 16, 4, 2)
+#if !defined(VUINT16x4_CMPGE_DEFINED) \
+	 && (defined(VUINT16x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT16x4_MIN_DEFINED) && defined(VUINT16x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 16, 4, 2)
+#if !defined(VUINT16x4_MIN_DEFINED) \
+	 && (defined(VUINT16x2_MIN_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_MIN_DEFINED
 #endif
-
-#if !defined(VUINT16x4_MAX_DEFINED) && defined(VUINT16x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 16, 4, 2)
+#if !defined(VUINT16x4_MAX_DEFINED) \
+	 && (defined(VUINT16x2_MAX_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_MAX_DEFINED
 #endif
-
-#if !defined(VUINT16x4_RSHIFT_DEFINED) && defined(VUINT16x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 16, 4, 2)
+#if !defined(VUINT16x4_RSHIFT_DEFINED) \
+	 && (defined(VUINT16x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_LRSHIFT_DEFINED) && defined(VUINT16x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 16, 4, 2)
+#if !defined(VUINT16x4_LRSHIFT_DEFINED) \
+	 && (defined(VUINT16x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x4_LSHIFT_DEFINED) && defined(VUINT16x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 16, 4, 2)
+#if !defined(VUINT16x4_LSHIFT_DEFINED) \
+	 && (defined(VUINT16x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.dbl[0] = vuint16x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint16x8 */
-
-#if !defined(VINT16x8_SPLAT_DEFINED) && defined(VINT16x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_SPLAT_DEFINED) \
+	 && (defined(VINT16x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
+{
+	vint16x8 vec;
+	vec.dbl[0] = vint16x4_splat(x);
+	vec.dbl[1] = vint16x4_splat(x);
+	return vec;
+}
 # define VINT16x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) && defined(VINT16x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT16x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	vec.dbl[0] = vint16x4_load_aligned(x);
+	vec.dbl[1] = vint16x4_load_aligned(x + 4);
+	return vec;
+}
 # define VINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x8_LOAD_DEFINED) && defined(VINT16x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_LOAD_DEFINED) \
+	 && (defined(VINT16x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	vec.dbl[0] = vint16x4_load(x);
+	vec.dbl[1] = vint16x4_load(x + 4);
+	return vec;
+}
 # define VINT16x8_LOAD_DEFINED
 #endif
-
-#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) && defined(VINT16x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT16x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8])
+{
+	vint16x4_store_aligned(vec.dbl[0], x);
+	vint16x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VINT16x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x8_STORE_DEFINED) && defined(VINT16x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_STORE_DEFINED) \
+	 && (defined(VINT16x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8])
+{
+	vint16x4_store(vec.dbl[0], x);
+	vint16x4_store(vec.dbl[1], x + 4);
+}
 # define VINT16x8_STORE_DEFINED
 #endif
-
-#if !defined(VINT16x8_ADD_DEFINED) && defined(VINT16x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_ADD_DEFINED) \
+	 && (defined(VINT16x4_ADD_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_ADD_DEFINED
 #endif
-
-#if !defined(VINT16x8_SUB_DEFINED) && defined(VINT16x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_SUB_DEFINED) \
+	 && (defined(VINT16x4_SUB_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_SUB_DEFINED
 #endif
-
-#if !defined(VINT16x8_MUL_DEFINED) && defined(VINT16x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_MUL_DEFINED) \
+	 && (defined(VINT16x4_MUL_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_MUL_DEFINED
 #endif
-
-#if !defined(VINT16x8_DIV_DEFINED) && defined(VINT16x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_DIV_DEFINED) \
+	 && (defined(VINT16x4_DIV_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_div(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_DIV_DEFINED
 #endif
-
-#if !defined(VINT16x8_MOD_DEFINED) && defined(VINT16x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_MOD_DEFINED) \
+	 && (defined(VINT16x4_MOD_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_mod(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_MOD_DEFINED
 #endif
-
-#if !defined(VINT16x8_AVG_DEFINED) && defined(VINT16x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_AVG_DEFINED) \
+	 && (defined(VINT16x4_AVG_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_AVG_DEFINED
 #endif
-
-#if !defined(VINT16x8_AND_DEFINED) && defined(VINT16x4_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_AND_DEFINED) \
+	 && (defined(VINT16x4_AND_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_AND_DEFINED
 #endif
-
-#if !defined(VINT16x8_OR_DEFINED) && defined(VINT16x4_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_OR_DEFINED) \
+	 && (defined(VINT16x4_OR_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_OR_DEFINED
 #endif
-
-#if !defined(VINT16x8_XOR_DEFINED) && defined(VINT16x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_XOR_DEFINED) \
+	 && (defined(VINT16x4_XOR_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_XOR_DEFINED
 #endif
-
-#if !defined(VINT16x8_NOT_DEFINED) && defined(VINT16x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_NOT_DEFINED) \
+	 && (defined(VINT16x4_NOT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec)
+{
+	vec.dbl[0] = vint16x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vint16x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT16x8_NOT_DEFINED
 #endif
-
-#if !defined(VINT16x8_CMPLT_DEFINED) && defined(VINT16x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_CMPLT_DEFINED) \
+	 && (defined(VINT16x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT16x8_CMPEQ_DEFINED) && defined(VINT16x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_CMPEQ_DEFINED) \
+	 && (defined(VINT16x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT16x8_CMPGT_DEFINED) && defined(VINT16x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_CMPGT_DEFINED) \
+	 && (defined(VINT16x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT16x8_CMPLE_DEFINED) && defined(VINT16x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_CMPLE_DEFINED) \
+	 && (defined(VINT16x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT16x8_CMPGE_DEFINED) && defined(VINT16x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_CMPGE_DEFINED) \
+	 && (defined(VINT16x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT16x8_MIN_DEFINED) && defined(VINT16x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_MIN_DEFINED) \
+	 && (defined(VINT16x4_MIN_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_MIN_DEFINED
 #endif
-
-#if !defined(VINT16x8_MAX_DEFINED) && defined(VINT16x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_MAX_DEFINED) \
+	 && (defined(VINT16x4_MAX_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_MAX_DEFINED
 #endif
-
-#if !defined(VINT16x8_RSHIFT_DEFINED) && defined(VINT16x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_RSHIFT_DEFINED) \
+	 && (defined(VINT16x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x8_LRSHIFT_DEFINED) && defined(VINT16x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_LRSHIFT_DEFINED) \
+	 && (defined(VINT16x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x8_LSHIFT_DEFINED) && defined(VINT16x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 16, 8, 4)
+#if !defined(VINT16x8_LSHIFT_DEFINED) \
+	 && (defined(VINT16x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vint16x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint16x8 */
-
-#if !defined(VUINT16x8_SPLAT_DEFINED) && defined(VUINT16x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 16, 8, 4)
+#if !defined(VUINT16x8_SPLAT_DEFINED) \
+	 && (defined(VUINT16x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
+{
+	vuint16x8 vec;
+	vec.dbl[0] = vuint16x4_splat(x);
+	vec.dbl[1] = vuint16x4_splat(x);
+	return vec;
+}
 # define VUINT16x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) && defined(VUINT16x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 16, 8, 4)
+#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	vec.dbl[0] = vuint16x4_load_aligned(x);
+	vec.dbl[1] = vuint16x4_load_aligned(x + 4);
+	return vec;
+}
 # define VUINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x8_LOAD_DEFINED) && defined(VUINT16x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 16, 8, 4)
+#if !defined(VUINT16x8_LOAD_DEFINED) \
+	 && (defined(VUINT16x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	vec.dbl[0] = vuint16x4_load(x);
+	vec.dbl[1] = vuint16x4_load(x + 4);
+	return vec;
+}
 # define VUINT16x8_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) && defined(VUINT16x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 16, 8, 4)
+#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8])
+{
+	vuint16x4_store_aligned(vec.dbl[0], x);
+	vuint16x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VUINT16x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x8_STORE_DEFINED) && defined(VUINT16x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 16, 8, 4)
+#if !defined(VUINT16x8_STORE_DEFINED) \
+	 && (defined(VUINT16x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8])
+{
+	vuint16x4_store(vec.dbl[0], x);
+	vuint16x4_store(vec.dbl[1], x + 4);
+}
 # define VUINT16x8_STORE_DEFINED
 #endif
-
-#if !defined(VUINT16x8_ADD_DEFINED) && defined(VUINT16x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 16, 8, 4)
+#if !defined(VUINT16x8_ADD_DEFINED) \
+	 && (defined(VUINT16x4_ADD_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_ADD_DEFINED
 #endif
-
-#if !defined(VUINT16x8_SUB_DEFINED) && defined(VUINT16x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 16, 8, 4)
+#if !defined(VUINT16x8_SUB_DEFINED) \
+	 && (defined(VUINT16x4_SUB_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_SUB_DEFINED
 #endif
-
-#if !defined(VUINT16x8_MUL_DEFINED) && defined(VUINT16x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 16, 8, 4)
+#if !defined(VUINT16x8_MUL_DEFINED) \
+	 && (defined(VUINT16x4_MUL_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_MUL_DEFINED
 #endif
-
-#if !defined(VUINT16x8_DIV_DEFINED) && defined(VUINT16x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 16, 8, 4)
+#if !defined(VUINT16x8_DIV_DEFINED) \
+	 && (defined(VUINT16x4_DIV_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_div(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_DIV_DEFINED
 #endif
-
-#if !defined(VUINT16x8_MOD_DEFINED) && defined(VUINT16x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 16, 8, 4)
+#if !defined(VUINT16x8_MOD_DEFINED) \
+	 && (defined(VUINT16x4_MOD_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mod(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_MOD_DEFINED
 #endif
-
-#if !defined(VUINT16x8_AVG_DEFINED) && defined(VUINT16x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 16, 8, 4)
+#if !defined(VUINT16x8_AVG_DEFINED) \
+	 && (defined(VUINT16x4_AVG_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_AVG_DEFINED
 #endif
-
-#if !defined(VUINT16x8_AND_DEFINED) && defined(VUINT16x4_AND_DEFINED)
-VEC_DOUBLE_AND(u, 16, 8, 4)
+#if !defined(VUINT16x8_AND_DEFINED) \
+	 && (defined(VUINT16x4_AND_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_AND_DEFINED
 #endif
-
-#if !defined(VUINT16x8_OR_DEFINED) && defined(VUINT16x4_OR_DEFINED)
-VEC_DOUBLE_OR(u, 16, 8, 4)
+#if !defined(VUINT16x8_OR_DEFINED) \
+	 && (defined(VUINT16x4_OR_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_OR_DEFINED
 #endif
-
-#if !defined(VUINT16x8_XOR_DEFINED) && defined(VUINT16x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 16, 8, 4)
+#if !defined(VUINT16x8_XOR_DEFINED) \
+	 && (defined(VUINT16x4_XOR_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_XOR_DEFINED
 #endif
-
-#if !defined(VUINT16x8_NOT_DEFINED) && defined(VUINT16x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 16, 8, 4)
+#if !defined(VUINT16x8_NOT_DEFINED) \
+	 && (defined(VUINT16x4_NOT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec)
+{
+	vec.dbl[0] = vuint16x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint16x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT16x8_NOT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_CMPLT_DEFINED) && defined(VUINT16x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 16, 8, 4)
+#if !defined(VUINT16x8_CMPLT_DEFINED) \
+	 && (defined(VUINT16x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_CMPEQ_DEFINED) && defined(VUINT16x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 16, 8, 4)
+#if !defined(VUINT16x8_CMPEQ_DEFINED) \
+	 && (defined(VUINT16x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT16x8_CMPGT_DEFINED) && defined(VUINT16x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 16, 8, 4)
+#if !defined(VUINT16x8_CMPGT_DEFINED) \
+	 && (defined(VUINT16x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_CMPLE_DEFINED) && defined(VUINT16x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 16, 8, 4)
+#if !defined(VUINT16x8_CMPLE_DEFINED) \
+	 && (defined(VUINT16x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT16x8_CMPGE_DEFINED) && defined(VUINT16x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 16, 8, 4)
+#if !defined(VUINT16x8_CMPGE_DEFINED) \
+	 && (defined(VUINT16x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT16x8_MIN_DEFINED) && defined(VUINT16x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 16, 8, 4)
+#if !defined(VUINT16x8_MIN_DEFINED) \
+	 && (defined(VUINT16x4_MIN_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_MIN_DEFINED
 #endif
-
-#if !defined(VUINT16x8_MAX_DEFINED) && defined(VUINT16x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 16, 8, 4)
+#if !defined(VUINT16x8_MAX_DEFINED) \
+	 && (defined(VUINT16x4_MAX_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_MAX_DEFINED
 #endif
-
-#if !defined(VUINT16x8_RSHIFT_DEFINED) && defined(VUINT16x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 16, 8, 4)
+#if !defined(VUINT16x8_RSHIFT_DEFINED) \
+	 && (defined(VUINT16x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_LRSHIFT_DEFINED) && defined(VUINT16x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 16, 8, 4)
+#if !defined(VUINT16x8_LRSHIFT_DEFINED) \
+	 && (defined(VUINT16x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x8_LSHIFT_DEFINED) && defined(VUINT16x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 16, 8, 4)
+#if !defined(VUINT16x8_LSHIFT_DEFINED) \
+	 && (defined(VUINT16x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.dbl[0] = vuint16x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint16x16 */
-
-#if !defined(VINT16x16_SPLAT_DEFINED) && defined(VINT16x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_SPLAT_DEFINED) \
+	 && (defined(VINT16x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x)
+{
+	vint16x16 vec;
+	vec.dbl[0] = vint16x8_splat(x);
+	vec.dbl[1] = vint16x8_splat(x);
+	return vec;
+}
 # define VINT16x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) && defined(VINT16x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT16x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	vec.dbl[0] = vint16x8_load_aligned(x);
+	vec.dbl[1] = vint16x8_load_aligned(x + 8);
+	return vec;
+}
 # define VINT16x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x16_LOAD_DEFINED) && defined(VINT16x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_LOAD_DEFINED) \
+	 && (defined(VINT16x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	vec.dbl[0] = vint16x8_load(x);
+	vec.dbl[1] = vint16x8_load(x + 8);
+	return vec;
+}
 # define VINT16x16_LOAD_DEFINED
 #endif
-
-#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) && defined(VINT16x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT16x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16])
+{
+	vint16x8_store_aligned(vec.dbl[0], x);
+	vint16x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VINT16x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x16_STORE_DEFINED) && defined(VINT16x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_STORE_DEFINED) \
+	 && (defined(VINT16x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16])
+{
+	vint16x8_store(vec.dbl[0], x);
+	vint16x8_store(vec.dbl[1], x + 8);
+}
 # define VINT16x16_STORE_DEFINED
 #endif
-
-#if !defined(VINT16x16_ADD_DEFINED) && defined(VINT16x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_ADD_DEFINED) \
+	 && (defined(VINT16x8_ADD_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_ADD_DEFINED
 #endif
-
-#if !defined(VINT16x16_SUB_DEFINED) && defined(VINT16x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_SUB_DEFINED) \
+	 && (defined(VINT16x8_SUB_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_SUB_DEFINED
 #endif
-
-#if !defined(VINT16x16_MUL_DEFINED) && defined(VINT16x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_MUL_DEFINED) \
+	 && (defined(VINT16x8_MUL_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_MUL_DEFINED
 #endif
-
-#if !defined(VINT16x16_DIV_DEFINED) && defined(VINT16x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_DIV_DEFINED) \
+	 && (defined(VINT16x8_DIV_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_div(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_DIV_DEFINED
 #endif
-
-#if !defined(VINT16x16_MOD_DEFINED) && defined(VINT16x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_MOD_DEFINED) \
+	 && (defined(VINT16x8_MOD_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_mod(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_MOD_DEFINED
 #endif
-
-#if !defined(VINT16x16_AVG_DEFINED) && defined(VINT16x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_AVG_DEFINED) \
+	 && (defined(VINT16x8_AVG_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_AVG_DEFINED
 #endif
-
-#if !defined(VINT16x16_AND_DEFINED) && defined(VINT16x8_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_AND_DEFINED) \
+	 && (defined(VINT16x8_AND_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_AND_DEFINED
 #endif
-
-#if !defined(VINT16x16_OR_DEFINED) && defined(VINT16x8_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_OR_DEFINED) \
+	 && (defined(VINT16x8_OR_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_OR_DEFINED
 #endif
-
-#if !defined(VINT16x16_XOR_DEFINED) && defined(VINT16x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_XOR_DEFINED) \
+	 && (defined(VINT16x8_XOR_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_XOR_DEFINED
 #endif
-
-#if !defined(VINT16x16_NOT_DEFINED) && defined(VINT16x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_NOT_DEFINED) \
+	 && (defined(VINT16x8_NOT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec)
+{
+	vec.dbl[0] = vint16x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vint16x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT16x16_NOT_DEFINED
 #endif
-
-#if !defined(VINT16x16_CMPLT_DEFINED) && defined(VINT16x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_CMPLT_DEFINED) \
+	 && (defined(VINT16x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT16x16_CMPEQ_DEFINED) && defined(VINT16x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_CMPEQ_DEFINED) \
+	 && (defined(VINT16x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT16x16_CMPGT_DEFINED) && defined(VINT16x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_CMPGT_DEFINED) \
+	 && (defined(VINT16x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT16x16_CMPLE_DEFINED) && defined(VINT16x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_CMPLE_DEFINED) \
+	 && (defined(VINT16x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT16x16_CMPGE_DEFINED) && defined(VINT16x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_CMPGE_DEFINED) \
+	 && (defined(VINT16x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT16x16_MIN_DEFINED) && defined(VINT16x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_MIN_DEFINED) \
+	 && (defined(VINT16x8_MIN_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_MIN_DEFINED
 #endif
-
-#if !defined(VINT16x16_MAX_DEFINED) && defined(VINT16x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_MAX_DEFINED) \
+	 && (defined(VINT16x8_MAX_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_MAX_DEFINED
 #endif
-
-#if !defined(VINT16x16_RSHIFT_DEFINED) && defined(VINT16x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_RSHIFT_DEFINED) \
+	 && (defined(VINT16x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x16_LRSHIFT_DEFINED) && defined(VINT16x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_LRSHIFT_DEFINED) \
+	 && (defined(VINT16x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x16_LSHIFT_DEFINED) && defined(VINT16x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 16, 16, 8)
+#if !defined(VINT16x16_LSHIFT_DEFINED) \
+	 && (defined(VINT16x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vint16x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint16x16 */
-
-#if !defined(VUINT16x16_SPLAT_DEFINED) && defined(VUINT16x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 16, 16, 8)
+#if !defined(VUINT16x16_SPLAT_DEFINED) \
+	 && (defined(VUINT16x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x)
+{
+	vuint16x16 vec;
+	vec.dbl[0] = vuint16x8_splat(x);
+	vec.dbl[1] = vuint16x8_splat(x);
+	return vec;
+}
 # define VUINT16x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) && defined(VUINT16x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 16, 16, 8)
+#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	vec.dbl[0] = vuint16x8_load_aligned(x);
+	vec.dbl[1] = vuint16x8_load_aligned(x + 8);
+	return vec;
+}
 # define VUINT16x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x16_LOAD_DEFINED) && defined(VUINT16x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 16, 16, 8)
+#if !defined(VUINT16x16_LOAD_DEFINED) \
+	 && (defined(VUINT16x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	vec.dbl[0] = vuint16x8_load(x);
+	vec.dbl[1] = vuint16x8_load(x + 8);
+	return vec;
+}
 # define VUINT16x16_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) && defined(VUINT16x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 16, 16, 8)
+#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16])
+{
+	vuint16x8_store_aligned(vec.dbl[0], x);
+	vuint16x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VUINT16x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x16_STORE_DEFINED) && defined(VUINT16x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 16, 16, 8)
+#if !defined(VUINT16x16_STORE_DEFINED) \
+	 && (defined(VUINT16x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16])
+{
+	vuint16x8_store(vec.dbl[0], x);
+	vuint16x8_store(vec.dbl[1], x + 8);
+}
 # define VUINT16x16_STORE_DEFINED
 #endif
-
-#if !defined(VUINT16x16_ADD_DEFINED) && defined(VUINT16x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 16, 16, 8)
+#if !defined(VUINT16x16_ADD_DEFINED) \
+	 && (defined(VUINT16x8_ADD_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_ADD_DEFINED
 #endif
-
-#if !defined(VUINT16x16_SUB_DEFINED) && defined(VUINT16x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 16, 16, 8)
+#if !defined(VUINT16x16_SUB_DEFINED) \
+	 && (defined(VUINT16x8_SUB_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_SUB_DEFINED
 #endif
-
-#if !defined(VUINT16x16_MUL_DEFINED) && defined(VUINT16x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 16, 16, 8)
+#if !defined(VUINT16x16_MUL_DEFINED) \
+	 && (defined(VUINT16x8_MUL_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_MUL_DEFINED
 #endif
-
-#if !defined(VUINT16x16_DIV_DEFINED) && defined(VUINT16x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 16, 16, 8)
+#if !defined(VUINT16x16_DIV_DEFINED) \
+	 && (defined(VUINT16x8_DIV_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_div(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_DIV_DEFINED
 #endif
-
-#if !defined(VUINT16x16_MOD_DEFINED) && defined(VUINT16x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 16, 16, 8)
+#if !defined(VUINT16x16_MOD_DEFINED) \
+	 && (defined(VUINT16x8_MOD_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_mod(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_MOD_DEFINED
 #endif
-
-#if !defined(VUINT16x16_AVG_DEFINED) && defined(VUINT16x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 16, 16, 8)
+#if !defined(VUINT16x16_AVG_DEFINED) \
+	 && (defined(VUINT16x8_AVG_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_AVG_DEFINED
 #endif
-
-#if !defined(VUINT16x16_AND_DEFINED) && defined(VUINT16x8_AND_DEFINED)
-VEC_DOUBLE_AND(u, 16, 16, 8)
+#if !defined(VUINT16x16_AND_DEFINED) \
+	 && (defined(VUINT16x8_AND_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_AND_DEFINED
 #endif
-
-#if !defined(VUINT16x16_OR_DEFINED) && defined(VUINT16x8_OR_DEFINED)
-VEC_DOUBLE_OR(u, 16, 16, 8)
+#if !defined(VUINT16x16_OR_DEFINED) \
+	 && (defined(VUINT16x8_OR_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_OR_DEFINED
 #endif
-
-#if !defined(VUINT16x16_XOR_DEFINED) && defined(VUINT16x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 16, 16, 8)
+#if !defined(VUINT16x16_XOR_DEFINED) \
+	 && (defined(VUINT16x8_XOR_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_XOR_DEFINED
 #endif
-
-#if !defined(VUINT16x16_NOT_DEFINED) && defined(VUINT16x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 16, 16, 8)
+#if !defined(VUINT16x16_NOT_DEFINED) \
+	 && (defined(VUINT16x8_NOT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec)
+{
+	vec.dbl[0] = vuint16x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint16x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT16x16_NOT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_CMPLT_DEFINED) && defined(VUINT16x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 16, 16, 8)
+#if !defined(VUINT16x16_CMPLT_DEFINED) \
+	 && (defined(VUINT16x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_CMPEQ_DEFINED) && defined(VUINT16x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 16, 16, 8)
+#if !defined(VUINT16x16_CMPEQ_DEFINED) \
+	 && (defined(VUINT16x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT16x16_CMPGT_DEFINED) && defined(VUINT16x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 16, 16, 8)
+#if !defined(VUINT16x16_CMPGT_DEFINED) \
+	 && (defined(VUINT16x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_CMPLE_DEFINED) && defined(VUINT16x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 16, 16, 8)
+#if !defined(VUINT16x16_CMPLE_DEFINED) \
+	 && (defined(VUINT16x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT16x16_CMPGE_DEFINED) && defined(VUINT16x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 16, 16, 8)
+#if !defined(VUINT16x16_CMPGE_DEFINED) \
+	 && (defined(VUINT16x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT16x16_MIN_DEFINED) && defined(VUINT16x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 16, 16, 8)
+#if !defined(VUINT16x16_MIN_DEFINED) \
+	 && (defined(VUINT16x8_MIN_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_MIN_DEFINED
 #endif
-
-#if !defined(VUINT16x16_MAX_DEFINED) && defined(VUINT16x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 16, 16, 8)
+#if !defined(VUINT16x16_MAX_DEFINED) \
+	 && (defined(VUINT16x8_MAX_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_MAX_DEFINED
 #endif
-
-#if !defined(VUINT16x16_RSHIFT_DEFINED) && defined(VUINT16x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 16, 16, 8)
+#if !defined(VUINT16x16_RSHIFT_DEFINED) \
+	 && (defined(VUINT16x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_LRSHIFT_DEFINED) && defined(VUINT16x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 16, 16, 8)
+#if !defined(VUINT16x16_LRSHIFT_DEFINED) \
+	 && (defined(VUINT16x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x16_LSHIFT_DEFINED) && defined(VUINT16x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 16, 16, 8)
+#if !defined(VUINT16x16_LSHIFT_DEFINED) \
+	 && (defined(VUINT16x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.dbl[0] = vuint16x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint16x32 */
-
-#if !defined(VINT16x32_SPLAT_DEFINED) && defined(VINT16x16_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_SPLAT_DEFINED) \
+	 && (defined(VINT16x16_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x)
+{
+	vint16x32 vec;
+	vec.dbl[0] = vint16x16_splat(x);
+	vec.dbl[1] = vint16x16_splat(x);
+	return vec;
+}
 # define VINT16x32_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) && defined(VINT16x16_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT16x16_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	vec.dbl[0] = vint16x16_load_aligned(x);
+	vec.dbl[1] = vint16x16_load_aligned(x + 16);
+	return vec;
+}
 # define VINT16x32_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x32_LOAD_DEFINED) && defined(VINT16x16_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_LOAD_DEFINED) \
+	 && (defined(VINT16x16_LOAD_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	vec.dbl[0] = vint16x16_load(x);
+	vec.dbl[1] = vint16x16_load(x + 16);
+	return vec;
+}
 # define VINT16x32_LOAD_DEFINED
 #endif
-
-#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) && defined(VINT16x16_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT16x16_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32])
+{
+	vint16x16_store_aligned(vec.dbl[0], x);
+	vint16x16_store_aligned(vec.dbl[1], x + 16);
+}
 # define VINT16x32_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT16x32_STORE_DEFINED) && defined(VINT16x16_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_STORE_DEFINED) \
+	 && (defined(VINT16x16_STORE_DEFINED))
+VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32])
+{
+	vint16x16_store(vec.dbl[0], x);
+	vint16x16_store(vec.dbl[1], x + 16);
+}
 # define VINT16x32_STORE_DEFINED
 #endif
-
-#if !defined(VINT16x32_ADD_DEFINED) && defined(VINT16x16_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_ADD_DEFINED) \
+	 && (defined(VINT16x16_ADD_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_ADD_DEFINED
 #endif
-
-#if !defined(VINT16x32_SUB_DEFINED) && defined(VINT16x16_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_SUB_DEFINED) \
+	 && (defined(VINT16x16_SUB_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_SUB_DEFINED
 #endif
-
-#if !defined(VINT16x32_MUL_DEFINED) && defined(VINT16x16_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_MUL_DEFINED) \
+	 && (defined(VINT16x16_MUL_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_MUL_DEFINED
 #endif
-
-#if !defined(VINT16x32_DIV_DEFINED) && defined(VINT16x16_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_DIV_DEFINED) \
+	 && (defined(VINT16x16_DIV_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_div(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_DIV_DEFINED
 #endif
-
-#if !defined(VINT16x32_MOD_DEFINED) && defined(VINT16x16_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_MOD_DEFINED) \
+	 && (defined(VINT16x16_MOD_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_mod(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_MOD_DEFINED
 #endif
-
-#if !defined(VINT16x32_AVG_DEFINED) && defined(VINT16x16_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_AVG_DEFINED) \
+	 && (defined(VINT16x16_AVG_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_AVG_DEFINED
 #endif
-
-#if !defined(VINT16x32_AND_DEFINED) && defined(VINT16x16_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_AND_DEFINED) \
+	 && (defined(VINT16x16_AND_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_AND_DEFINED
 #endif
-
-#if !defined(VINT16x32_OR_DEFINED) && defined(VINT16x16_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_OR_DEFINED) \
+	 && (defined(VINT16x16_OR_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_OR_DEFINED
 #endif
-
-#if !defined(VINT16x32_XOR_DEFINED) && defined(VINT16x16_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_XOR_DEFINED) \
+	 && (defined(VINT16x16_XOR_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_XOR_DEFINED
 #endif
-
-#if !defined(VINT16x32_NOT_DEFINED) && defined(VINT16x16_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_NOT_DEFINED) \
+	 && (defined(VINT16x16_NOT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec)
+{
+	vec.dbl[0] = vint16x16_not(vec.dbl[0]);
+	vec1.dbl[1] = vint16x16_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT16x32_NOT_DEFINED
 #endif
-
-#if !defined(VINT16x32_CMPLT_DEFINED) && defined(VINT16x16_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_CMPLT_DEFINED) \
+	 && (defined(VINT16x16_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT16x32_CMPEQ_DEFINED) && defined(VINT16x16_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_CMPEQ_DEFINED) \
+	 && (defined(VINT16x16_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT16x32_CMPGT_DEFINED) && defined(VINT16x16_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_CMPGT_DEFINED) \
+	 && (defined(VINT16x16_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT16x32_CMPLE_DEFINED) && defined(VINT16x16_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_CMPLE_DEFINED) \
+	 && (defined(VINT16x16_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT16x32_CMPGE_DEFINED) && defined(VINT16x16_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_CMPGE_DEFINED) \
+	 && (defined(VINT16x16_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT16x32_MIN_DEFINED) && defined(VINT16x16_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_MIN_DEFINED) \
+	 && (defined(VINT16x16_MIN_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_MIN_DEFINED
 #endif
-
-#if !defined(VINT16x32_MAX_DEFINED) && defined(VINT16x16_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_MAX_DEFINED) \
+	 && (defined(VINT16x16_MAX_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_MAX_DEFINED
 #endif
-
-#if !defined(VINT16x32_RSHIFT_DEFINED) && defined(VINT16x16_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_RSHIFT_DEFINED) \
+	 && (defined(VINT16x16_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x32_LRSHIFT_DEFINED) && defined(VINT16x16_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_LRSHIFT_DEFINED) \
+	 && (defined(VINT16x16_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT16x32_LSHIFT_DEFINED) && defined(VINT16x16_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 16, 32, 16)
+#if !defined(VINT16x32_LSHIFT_DEFINED) \
+	 && (defined(VINT16x16_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vint16x16_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint16x16_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT16x32_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint16x32 */
-
-#if !defined(VUINT16x32_SPLAT_DEFINED) && defined(VUINT16x16_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 16, 32, 16)
+#if !defined(VUINT16x32_SPLAT_DEFINED) \
+	 && (defined(VUINT16x16_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x)
+{
+	vuint16x32 vec;
+	vec.dbl[0] = vuint16x16_splat(x);
+	vec.dbl[1] = vuint16x16_splat(x);
+	return vec;
+}
 # define VUINT16x32_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) && defined(VUINT16x16_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 16, 32, 16)
+#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x16_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	vec.dbl[0] = vuint16x16_load_aligned(x);
+	vec.dbl[1] = vuint16x16_load_aligned(x + 16);
+	return vec;
+}
 # define VUINT16x32_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x32_LOAD_DEFINED) && defined(VUINT16x16_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 16, 32, 16)
+#if !defined(VUINT16x32_LOAD_DEFINED) \
+	 && (defined(VUINT16x16_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	vec.dbl[0] = vuint16x16_load(x);
+	vec.dbl[1] = vuint16x16_load(x + 16);
+	return vec;
+}
 # define VUINT16x32_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) && defined(VUINT16x16_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 16, 32, 16)
+#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT16x16_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32])
+{
+	vuint16x16_store_aligned(vec.dbl[0], x);
+	vuint16x16_store_aligned(vec.dbl[1], x + 16);
+}
 # define VUINT16x32_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT16x32_STORE_DEFINED) && defined(VUINT16x16_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 16, 32, 16)
+#if !defined(VUINT16x32_STORE_DEFINED) \
+	 && (defined(VUINT16x16_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32])
+{
+	vuint16x16_store(vec.dbl[0], x);
+	vuint16x16_store(vec.dbl[1], x + 16);
+}
 # define VUINT16x32_STORE_DEFINED
 #endif
-
-#if !defined(VUINT16x32_ADD_DEFINED) && defined(VUINT16x16_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 16, 32, 16)
+#if !defined(VUINT16x32_ADD_DEFINED) \
+	 && (defined(VUINT16x16_ADD_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_ADD_DEFINED
 #endif
-
-#if !defined(VUINT16x32_SUB_DEFINED) && defined(VUINT16x16_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 16, 32, 16)
+#if !defined(VUINT16x32_SUB_DEFINED) \
+	 && (defined(VUINT16x16_SUB_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_SUB_DEFINED
 #endif
-
-#if !defined(VUINT16x32_MUL_DEFINED) && defined(VUINT16x16_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 16, 32, 16)
+#if !defined(VUINT16x32_MUL_DEFINED) \
+	 && (defined(VUINT16x16_MUL_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_MUL_DEFINED
 #endif
-
-#if !defined(VUINT16x32_DIV_DEFINED) && defined(VUINT16x16_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 16, 32, 16)
+#if !defined(VUINT16x32_DIV_DEFINED) \
+	 && (defined(VUINT16x16_DIV_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_div(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_DIV_DEFINED
 #endif
-
-#if !defined(VUINT16x32_MOD_DEFINED) && defined(VUINT16x16_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 16, 32, 16)
+#if !defined(VUINT16x32_MOD_DEFINED) \
+	 && (defined(VUINT16x16_MOD_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_mod(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_MOD_DEFINED
 #endif
-
-#if !defined(VUINT16x32_AVG_DEFINED) && defined(VUINT16x16_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 16, 32, 16)
+#if !defined(VUINT16x32_AVG_DEFINED) \
+	 && (defined(VUINT16x16_AVG_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_AVG_DEFINED
 #endif
-
-#if !defined(VUINT16x32_AND_DEFINED) && defined(VUINT16x16_AND_DEFINED)
-VEC_DOUBLE_AND(u, 16, 32, 16)
+#if !defined(VUINT16x32_AND_DEFINED) \
+	 && (defined(VUINT16x16_AND_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_AND_DEFINED
 #endif
-
-#if !defined(VUINT16x32_OR_DEFINED) && defined(VUINT16x16_OR_DEFINED)
-VEC_DOUBLE_OR(u, 16, 32, 16)
+#if !defined(VUINT16x32_OR_DEFINED) \
+	 && (defined(VUINT16x16_OR_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_OR_DEFINED
 #endif
-
-#if !defined(VUINT16x32_XOR_DEFINED) && defined(VUINT16x16_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 16, 32, 16)
+#if !defined(VUINT16x32_XOR_DEFINED) \
+	 && (defined(VUINT16x16_XOR_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_XOR_DEFINED
 #endif
-
-#if !defined(VUINT16x32_NOT_DEFINED) && defined(VUINT16x16_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 16, 32, 16)
+#if !defined(VUINT16x32_NOT_DEFINED) \
+	 && (defined(VUINT16x16_NOT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec)
+{
+	vec.dbl[0] = vuint16x16_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint16x16_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT16x32_NOT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_CMPLT_DEFINED) && defined(VUINT16x16_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 16, 32, 16)
+#if !defined(VUINT16x32_CMPLT_DEFINED) \
+	 && (defined(VUINT16x16_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_CMPEQ_DEFINED) && defined(VUINT16x16_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 16, 32, 16)
+#if !defined(VUINT16x32_CMPEQ_DEFINED) \
+	 && (defined(VUINT16x16_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT16x32_CMPGT_DEFINED) && defined(VUINT16x16_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 16, 32, 16)
+#if !defined(VUINT16x32_CMPGT_DEFINED) \
+	 && (defined(VUINT16x16_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_CMPLE_DEFINED) && defined(VUINT16x16_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 16, 32, 16)
+#if !defined(VUINT16x32_CMPLE_DEFINED) \
+	 && (defined(VUINT16x16_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT16x32_CMPGE_DEFINED) && defined(VUINT16x16_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 16, 32, 16)
+#if !defined(VUINT16x32_CMPGE_DEFINED) \
+	 && (defined(VUINT16x16_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT16x32_MIN_DEFINED) && defined(VUINT16x16_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 16, 32, 16)
+#if !defined(VUINT16x32_MIN_DEFINED) \
+	 && (defined(VUINT16x16_MIN_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_MIN_DEFINED
 #endif
-
-#if !defined(VUINT16x32_MAX_DEFINED) && defined(VUINT16x16_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 16, 32, 16)
+#if !defined(VUINT16x32_MAX_DEFINED) \
+	 && (defined(VUINT16x16_MAX_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_MAX_DEFINED
 #endif
-
-#if !defined(VUINT16x32_RSHIFT_DEFINED) && defined(VUINT16x16_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 16, 32, 16)
+#if !defined(VUINT16x32_RSHIFT_DEFINED) \
+	 && (defined(VUINT16x16_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_LRSHIFT_DEFINED) && defined(VUINT16x16_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 16, 32, 16)
+#if !defined(VUINT16x32_LRSHIFT_DEFINED) \
+	 && (defined(VUINT16x16_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT16x32_LSHIFT_DEFINED) && defined(VUINT16x16_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 16, 32, 16)
+#if !defined(VUINT16x32_LSHIFT_DEFINED) \
+	 && (defined(VUINT16x16_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.dbl[0] = vuint16x16_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint16x16_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT16x32_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint32x4 */
-
-#if !defined(VINT32x4_SPLAT_DEFINED) && defined(VINT32x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x2_SPLAT_DEFINED) \
+	 && (defined(VINT32x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x)
+{
+	vint32x2 vec;
+	vec.dbl[0] = vint32x1_splat(x);
+	vec.dbl[1] = vint32x1_splat(x);
+	return vec;
+}
+# define VINT32x2_SPLAT_DEFINED
+#endif
+#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT32x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	vec.dbl[0] = vint32x1_load_aligned(x);
+	vec.dbl[1] = vint32x1_load_aligned(x + 1);
+	return vec;
+}
+# define VINT32x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x2_LOAD_DEFINED) \
+	 && (defined(VINT32x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	vec.dbl[0] = vint32x1_load(x);
+	vec.dbl[1] = vint32x1_load(x + 1);
+	return vec;
+}
+# define VINT32x2_LOAD_DEFINED
+#endif
+#if !defined(VINT32x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT32x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2])
+{
+	vint32x1_store_aligned(vec.dbl[0], x);
+	vint32x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VINT32x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x2_STORE_DEFINED) \
+	 && (defined(VINT32x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2])
+{
+	vint32x1_store(vec.dbl[0], x);
+	vint32x1_store(vec.dbl[1], x + 1);
+}
+# define VINT32x2_STORE_DEFINED
+#endif
+#if !defined(VINT32x2_ADD_DEFINED) \
+	 && (defined(VINT32x1_ADD_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_ADD_DEFINED
+#endif
+#if !defined(VINT32x2_SUB_DEFINED) \
+	 && (defined(VINT32x1_SUB_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_SUB_DEFINED
+#endif
+#if !defined(VINT32x2_MUL_DEFINED) \
+	 && (defined(VINT32x1_MUL_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_MUL_DEFINED
+#endif
+#if !defined(VINT32x2_DIV_DEFINED) \
+	 && (defined(VINT32x1_DIV_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_div(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_DIV_DEFINED
+#endif
+#if !defined(VINT32x2_MOD_DEFINED) \
+	 && (defined(VINT32x1_MOD_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_mod(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_MOD_DEFINED
+#endif
+#if !defined(VINT32x2_AVG_DEFINED) \
+	 && (defined(VINT32x1_AVG_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_AVG_DEFINED
+#endif
+#if !defined(VINT32x2_AND_DEFINED) \
+	 && (defined(VINT32x1_AND_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_AND_DEFINED
+#endif
+#if !defined(VINT32x2_OR_DEFINED) \
+	 && (defined(VINT32x1_OR_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_OR_DEFINED
+#endif
+#if !defined(VINT32x2_XOR_DEFINED) \
+	 && (defined(VINT32x1_XOR_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_XOR_DEFINED
+#endif
+#if !defined(VINT32x2_NOT_DEFINED) \
+	 && (defined(VINT32x1_NOT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec)
+{
+	vec.dbl[0] = vint32x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vint32x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VINT32x2_NOT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPLT_DEFINED) \
+	 && (defined(VINT32x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPEQ_DEFINED) \
+	 && (defined(VINT32x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x2_CMPGT_DEFINED) \
+	 && (defined(VINT32x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPLE_DEFINED) \
+	 && (defined(VINT32x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x2_CMPGE_DEFINED) \
+	 && (defined(VINT32x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x2_MIN_DEFINED) \
+	 && (defined(VINT32x1_MIN_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_MIN_DEFINED
+#endif
+#if !defined(VINT32x2_MAX_DEFINED) \
+	 && (defined(VINT32x1_MAX_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_MAX_DEFINED
+#endif
+#if !defined(VINT32x2_RSHIFT_DEFINED) \
+	 && (defined(VINT32x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_RSHIFT_DEFINED
+#endif
+#if !defined(VINT32x2_LRSHIFT_DEFINED) \
+	 && (defined(VINT32x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x2_LSHIFT_DEFINED) \
+	 && (defined(VINT32x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vint32x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT32x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x2_SPLAT_DEFINED) \
+	 && (defined(VUINT32x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x)
+{
+	vuint32x2 vec;
+	vec.dbl[0] = vuint32x1_splat(x);
+	vec.dbl[1] = vuint32x1_splat(x);
+	return vec;
+}
+# define VUINT32x2_SPLAT_DEFINED
+#endif
+#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	vec.dbl[0] = vuint32x1_load_aligned(x);
+	vec.dbl[1] = vuint32x1_load_aligned(x + 1);
+	return vec;
+}
+# define VUINT32x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x2_LOAD_DEFINED) \
+	 && (defined(VUINT32x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	vec.dbl[0] = vuint32x1_load(x);
+	vec.dbl[1] = vuint32x1_load(x + 1);
+	return vec;
+}
+# define VUINT32x2_LOAD_DEFINED
+#endif
+#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2])
+{
+	vuint32x1_store_aligned(vec.dbl[0], x);
+	vuint32x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VUINT32x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x2_STORE_DEFINED) \
+	 && (defined(VUINT32x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2])
+{
+	vuint32x1_store(vec.dbl[0], x);
+	vuint32x1_store(vec.dbl[1], x + 1);
+}
+# define VUINT32x2_STORE_DEFINED
+#endif
+#if !defined(VUINT32x2_ADD_DEFINED) \
+	 && (defined(VUINT32x1_ADD_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_ADD_DEFINED
+#endif
+#if !defined(VUINT32x2_SUB_DEFINED) \
+	 && (defined(VUINT32x1_SUB_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_SUB_DEFINED
+#endif
+#if !defined(VUINT32x2_MUL_DEFINED) \
+	 && (defined(VUINT32x1_MUL_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_MUL_DEFINED
+#endif
+#if !defined(VUINT32x2_DIV_DEFINED) \
+	 && (defined(VUINT32x1_DIV_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_div(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_DIV_DEFINED
+#endif
+#if !defined(VUINT32x2_MOD_DEFINED) \
+	 && (defined(VUINT32x1_MOD_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_mod(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_MOD_DEFINED
+#endif
+#if !defined(VUINT32x2_AVG_DEFINED) \
+	 && (defined(VUINT32x1_AVG_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_AVG_DEFINED
+#endif
+#if !defined(VUINT32x2_AND_DEFINED) \
+	 && (defined(VUINT32x1_AND_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_AND_DEFINED
+#endif
+#if !defined(VUINT32x2_OR_DEFINED) \
+	 && (defined(VUINT32x1_OR_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_OR_DEFINED
+#endif
+#if !defined(VUINT32x2_XOR_DEFINED) \
+	 && (defined(VUINT32x1_XOR_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_XOR_DEFINED
+#endif
+#if !defined(VUINT32x2_NOT_DEFINED) \
+	 && (defined(VUINT32x1_NOT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec)
+{
+	vec.dbl[0] = vuint32x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint32x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VUINT32x2_NOT_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPLT_DEFINED) \
+	 && (defined(VUINT32x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_CMPLT_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPEQ_DEFINED) \
+	 && (defined(VUINT32x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPGT_DEFINED) \
+	 && (defined(VUINT32x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_CMPGT_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPLE_DEFINED) \
+	 && (defined(VUINT32x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_CMPLE_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPGE_DEFINED) \
+	 && (defined(VUINT32x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_CMPGE_DEFINED
+#endif
+#if !defined(VUINT32x2_MIN_DEFINED) \
+	 && (defined(VUINT32x1_MIN_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_MIN_DEFINED
+#endif
+#if !defined(VUINT32x2_MAX_DEFINED) \
+	 && (defined(VUINT32x1_MAX_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_MAX_DEFINED
+#endif
+#if !defined(VUINT32x2_RSHIFT_DEFINED) \
+	 && (defined(VUINT32x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x2_LRSHIFT_DEFINED) \
+	 && (defined(VUINT32x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x2_LSHIFT_DEFINED) \
+	 && (defined(VUINT32x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.dbl[0] = vuint32x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT32x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_SPLAT_DEFINED) \
+	 && (defined(VINT32x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
+{
+	vint32x4 vec;
+	vec.dbl[0] = vint32x2_splat(x);
+	vec.dbl[1] = vint32x2_splat(x);
+	return vec;
+}
 # define VINT32x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) && defined(VINT32x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT32x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	vec.dbl[0] = vint32x2_load_aligned(x);
+	vec.dbl[1] = vint32x2_load_aligned(x + 2);
+	return vec;
+}
 # define VINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x4_LOAD_DEFINED) && defined(VINT32x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_LOAD_DEFINED) \
+	 && (defined(VINT32x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	vec.dbl[0] = vint32x2_load(x);
+	vec.dbl[1] = vint32x2_load(x + 2);
+	return vec;
+}
 # define VINT32x4_LOAD_DEFINED
 #endif
-
-#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) && defined(VINT32x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT32x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4])
+{
+	vint32x2_store_aligned(vec.dbl[0], x);
+	vint32x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VINT32x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x4_STORE_DEFINED) && defined(VINT32x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_STORE_DEFINED) \
+	 && (defined(VINT32x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4])
+{
+	vint32x2_store(vec.dbl[0], x);
+	vint32x2_store(vec.dbl[1], x + 2);
+}
 # define VINT32x4_STORE_DEFINED
 #endif
-
-#if !defined(VINT32x4_ADD_DEFINED) && defined(VINT32x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_ADD_DEFINED) \
+	 && (defined(VINT32x2_ADD_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_ADD_DEFINED
 #endif
-
-#if !defined(VINT32x4_SUB_DEFINED) && defined(VINT32x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_SUB_DEFINED) \
+	 && (defined(VINT32x2_SUB_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_SUB_DEFINED
 #endif
-
-#if !defined(VINT32x4_MUL_DEFINED) && defined(VINT32x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_MUL_DEFINED) \
+	 && (defined(VINT32x2_MUL_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_MUL_DEFINED
 #endif
-
-#if !defined(VINT32x4_DIV_DEFINED) && defined(VINT32x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_DIV_DEFINED) \
+	 && (defined(VINT32x2_DIV_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_div(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_DIV_DEFINED
 #endif
-
-#if !defined(VINT32x4_MOD_DEFINED) && defined(VINT32x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_MOD_DEFINED) \
+	 && (defined(VINT32x2_MOD_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_mod(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_MOD_DEFINED
 #endif
-
-#if !defined(VINT32x4_AVG_DEFINED) && defined(VINT32x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_AVG_DEFINED) \
+	 && (defined(VINT32x2_AVG_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_AVG_DEFINED
 #endif
-
-#if !defined(VINT32x4_AND_DEFINED) && defined(VINT32x2_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_AND_DEFINED) \
+	 && (defined(VINT32x2_AND_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_AND_DEFINED
 #endif
-
-#if !defined(VINT32x4_OR_DEFINED) && defined(VINT32x2_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_OR_DEFINED) \
+	 && (defined(VINT32x2_OR_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_OR_DEFINED
 #endif
-
-#if !defined(VINT32x4_XOR_DEFINED) && defined(VINT32x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_XOR_DEFINED) \
+	 && (defined(VINT32x2_XOR_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_XOR_DEFINED
 #endif
-
-#if !defined(VINT32x4_NOT_DEFINED) && defined(VINT32x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_NOT_DEFINED) \
+	 && (defined(VINT32x2_NOT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec)
+{
+	vec.dbl[0] = vint32x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vint32x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT32x4_NOT_DEFINED
 #endif
-
-#if !defined(VINT32x4_CMPLT_DEFINED) && defined(VINT32x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_CMPLT_DEFINED) \
+	 && (defined(VINT32x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT32x4_CMPEQ_DEFINED) && defined(VINT32x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_CMPEQ_DEFINED) \
+	 && (defined(VINT32x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT32x4_CMPGT_DEFINED) && defined(VINT32x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_CMPGT_DEFINED) \
+	 && (defined(VINT32x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT32x4_CMPLE_DEFINED) && defined(VINT32x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_CMPLE_DEFINED) \
+	 && (defined(VINT32x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT32x4_CMPGE_DEFINED) && defined(VINT32x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_CMPGE_DEFINED) \
+	 && (defined(VINT32x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT32x4_MIN_DEFINED) && defined(VINT32x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_MIN_DEFINED) \
+	 && (defined(VINT32x2_MIN_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_MIN_DEFINED
 #endif
-
-#if !defined(VINT32x4_MAX_DEFINED) && defined(VINT32x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_MAX_DEFINED) \
+	 && (defined(VINT32x2_MAX_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_MAX_DEFINED
 #endif
-
-#if !defined(VINT32x4_RSHIFT_DEFINED) && defined(VINT32x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_RSHIFT_DEFINED) \
+	 && (defined(VINT32x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x4_LRSHIFT_DEFINED) && defined(VINT32x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_LRSHIFT_DEFINED) \
+	 && (defined(VINT32x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x4_LSHIFT_DEFINED) && defined(VINT32x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 32, 4, 2)
+#if !defined(VINT32x4_LSHIFT_DEFINED) \
+	 && (defined(VINT32x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vint32x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint32x4 */
-
-#if !defined(VUINT32x4_SPLAT_DEFINED) && defined(VUINT32x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 32, 4, 2)
+#if !defined(VUINT32x4_SPLAT_DEFINED) \
+	 && (defined(VUINT32x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
+{
+	vuint32x4 vec;
+	vec.dbl[0] = vuint32x2_splat(x);
+	vec.dbl[1] = vuint32x2_splat(x);
+	return vec;
+}
 # define VUINT32x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) && defined(VUINT32x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 32, 4, 2)
+#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	vec.dbl[0] = vuint32x2_load_aligned(x);
+	vec.dbl[1] = vuint32x2_load_aligned(x + 2);
+	return vec;
+}
 # define VUINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x4_LOAD_DEFINED) && defined(VUINT32x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 32, 4, 2)
+#if !defined(VUINT32x4_LOAD_DEFINED) \
+	 && (defined(VUINT32x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	vec.dbl[0] = vuint32x2_load(x);
+	vec.dbl[1] = vuint32x2_load(x + 2);
+	return vec;
+}
 # define VUINT32x4_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) && defined(VUINT32x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 32, 4, 2)
+#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4])
+{
+	vuint32x2_store_aligned(vec.dbl[0], x);
+	vuint32x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VUINT32x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x4_STORE_DEFINED) && defined(VUINT32x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 32, 4, 2)
+#if !defined(VUINT32x4_STORE_DEFINED) \
+	 && (defined(VUINT32x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4])
+{
+	vuint32x2_store(vec.dbl[0], x);
+	vuint32x2_store(vec.dbl[1], x + 2);
+}
 # define VUINT32x4_STORE_DEFINED
 #endif
-
-#if !defined(VUINT32x4_ADD_DEFINED) && defined(VUINT32x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 32, 4, 2)
+#if !defined(VUINT32x4_ADD_DEFINED) \
+	 && (defined(VUINT32x2_ADD_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_ADD_DEFINED
 #endif
-
-#if !defined(VUINT32x4_SUB_DEFINED) && defined(VUINT32x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 32, 4, 2)
+#if !defined(VUINT32x4_SUB_DEFINED) \
+	 && (defined(VUINT32x2_SUB_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_SUB_DEFINED
 #endif
-
-#if !defined(VUINT32x4_MUL_DEFINED) && defined(VUINT32x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 32, 4, 2)
+#if !defined(VUINT32x4_MUL_DEFINED) \
+	 && (defined(VUINT32x2_MUL_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_MUL_DEFINED
 #endif
-
-#if !defined(VUINT32x4_DIV_DEFINED) && defined(VUINT32x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 32, 4, 2)
+#if !defined(VUINT32x4_DIV_DEFINED) \
+	 && (defined(VUINT32x2_DIV_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_div(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_DIV_DEFINED
 #endif
-
-#if !defined(VUINT32x4_MOD_DEFINED) && defined(VUINT32x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 32, 4, 2)
+#if !defined(VUINT32x4_MOD_DEFINED) \
+	 && (defined(VUINT32x2_MOD_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mod(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_MOD_DEFINED
 #endif
-
-#if !defined(VUINT32x4_AVG_DEFINED) && defined(VUINT32x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 32, 4, 2)
+#if !defined(VUINT32x4_AVG_DEFINED) \
+	 && (defined(VUINT32x2_AVG_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_AVG_DEFINED
 #endif
-
-#if !defined(VUINT32x4_AND_DEFINED) && defined(VUINT32x2_AND_DEFINED)
-VEC_DOUBLE_AND(u, 32, 4, 2)
+#if !defined(VUINT32x4_AND_DEFINED) \
+	 && (defined(VUINT32x2_AND_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_AND_DEFINED
 #endif
-
-#if !defined(VUINT32x4_OR_DEFINED) && defined(VUINT32x2_OR_DEFINED)
-VEC_DOUBLE_OR(u, 32, 4, 2)
+#if !defined(VUINT32x4_OR_DEFINED) \
+	 && (defined(VUINT32x2_OR_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_OR_DEFINED
 #endif
-
-#if !defined(VUINT32x4_XOR_DEFINED) && defined(VUINT32x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 32, 4, 2)
+#if !defined(VUINT32x4_XOR_DEFINED) \
+	 && (defined(VUINT32x2_XOR_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_XOR_DEFINED
 #endif
-
-#if !defined(VUINT32x4_NOT_DEFINED) && defined(VUINT32x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 32, 4, 2)
+#if !defined(VUINT32x4_NOT_DEFINED) \
+	 && (defined(VUINT32x2_NOT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec)
+{
+	vec.dbl[0] = vuint32x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint32x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT32x4_NOT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_CMPLT_DEFINED) && defined(VUINT32x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 32, 4, 2)
+#if !defined(VUINT32x4_CMPLT_DEFINED) \
+	 && (defined(VUINT32x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_CMPEQ_DEFINED) && defined(VUINT32x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 32, 4, 2)
+#if !defined(VUINT32x4_CMPEQ_DEFINED) \
+	 && (defined(VUINT32x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT32x4_CMPGT_DEFINED) && defined(VUINT32x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 32, 4, 2)
+#if !defined(VUINT32x4_CMPGT_DEFINED) \
+	 && (defined(VUINT32x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_CMPLE_DEFINED) && defined(VUINT32x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 32, 4, 2)
+#if !defined(VUINT32x4_CMPLE_DEFINED) \
+	 && (defined(VUINT32x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT32x4_CMPGE_DEFINED) && defined(VUINT32x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 32, 4, 2)
+#if !defined(VUINT32x4_CMPGE_DEFINED) \
+	 && (defined(VUINT32x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT32x4_MIN_DEFINED) && defined(VUINT32x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 32, 4, 2)
+#if !defined(VUINT32x4_MIN_DEFINED) \
+	 && (defined(VUINT32x2_MIN_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_MIN_DEFINED
 #endif
-
-#if !defined(VUINT32x4_MAX_DEFINED) && defined(VUINT32x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 32, 4, 2)
+#if !defined(VUINT32x4_MAX_DEFINED) \
+	 && (defined(VUINT32x2_MAX_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_MAX_DEFINED
 #endif
-
-#if !defined(VUINT32x4_RSHIFT_DEFINED) && defined(VUINT32x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 32, 4, 2)
+#if !defined(VUINT32x4_RSHIFT_DEFINED) \
+	 && (defined(VUINT32x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_LRSHIFT_DEFINED) && defined(VUINT32x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 32, 4, 2)
+#if !defined(VUINT32x4_LRSHIFT_DEFINED) \
+	 && (defined(VUINT32x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x4_LSHIFT_DEFINED) && defined(VUINT32x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 32, 4, 2)
+#if !defined(VUINT32x4_LSHIFT_DEFINED) \
+	 && (defined(VUINT32x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.dbl[0] = vuint32x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint32x8 */
-
-#if !defined(VINT32x8_SPLAT_DEFINED) && defined(VINT32x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_SPLAT_DEFINED) \
+	 && (defined(VINT32x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x)
+{
+	vint32x8 vec;
+	vec.dbl[0] = vint32x4_splat(x);
+	vec.dbl[1] = vint32x4_splat(x);
+	return vec;
+}
 # define VINT32x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) && defined(VINT32x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT32x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	vec.dbl[0] = vint32x4_load_aligned(x);
+	vec.dbl[1] = vint32x4_load_aligned(x + 4);
+	return vec;
+}
 # define VINT32x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x8_LOAD_DEFINED) && defined(VINT32x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_LOAD_DEFINED) \
+	 && (defined(VINT32x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	vec.dbl[0] = vint32x4_load(x);
+	vec.dbl[1] = vint32x4_load(x + 4);
+	return vec;
+}
 # define VINT32x8_LOAD_DEFINED
 #endif
-
-#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) && defined(VINT32x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT32x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8])
+{
+	vint32x4_store_aligned(vec.dbl[0], x);
+	vint32x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VINT32x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x8_STORE_DEFINED) && defined(VINT32x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_STORE_DEFINED) \
+	 && (defined(VINT32x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8])
+{
+	vint32x4_store(vec.dbl[0], x);
+	vint32x4_store(vec.dbl[1], x + 4);
+}
 # define VINT32x8_STORE_DEFINED
 #endif
-
-#if !defined(VINT32x8_ADD_DEFINED) && defined(VINT32x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_ADD_DEFINED) \
+	 && (defined(VINT32x4_ADD_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_ADD_DEFINED
 #endif
-
-#if !defined(VINT32x8_SUB_DEFINED) && defined(VINT32x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_SUB_DEFINED) \
+	 && (defined(VINT32x4_SUB_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_SUB_DEFINED
 #endif
-
-#if !defined(VINT32x8_MUL_DEFINED) && defined(VINT32x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_MUL_DEFINED) \
+	 && (defined(VINT32x4_MUL_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_MUL_DEFINED
 #endif
-
-#if !defined(VINT32x8_DIV_DEFINED) && defined(VINT32x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_DIV_DEFINED) \
+	 && (defined(VINT32x4_DIV_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_div(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_DIV_DEFINED
 #endif
-
-#if !defined(VINT32x8_MOD_DEFINED) && defined(VINT32x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_MOD_DEFINED) \
+	 && (defined(VINT32x4_MOD_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_mod(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_MOD_DEFINED
 #endif
-
-#if !defined(VINT32x8_AVG_DEFINED) && defined(VINT32x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_AVG_DEFINED) \
+	 && (defined(VINT32x4_AVG_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_AVG_DEFINED
 #endif
-
-#if !defined(VINT32x8_AND_DEFINED) && defined(VINT32x4_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_AND_DEFINED) \
+	 && (defined(VINT32x4_AND_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_AND_DEFINED
 #endif
-
-#if !defined(VINT32x8_OR_DEFINED) && defined(VINT32x4_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_OR_DEFINED) \
+	 && (defined(VINT32x4_OR_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_OR_DEFINED
 #endif
-
-#if !defined(VINT32x8_XOR_DEFINED) && defined(VINT32x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_XOR_DEFINED) \
+	 && (defined(VINT32x4_XOR_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_XOR_DEFINED
 #endif
-
-#if !defined(VINT32x8_NOT_DEFINED) && defined(VINT32x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_NOT_DEFINED) \
+	 && (defined(VINT32x4_NOT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec)
+{
+	vec.dbl[0] = vint32x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vint32x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT32x8_NOT_DEFINED
 #endif
-
-#if !defined(VINT32x8_CMPLT_DEFINED) && defined(VINT32x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_CMPLT_DEFINED) \
+	 && (defined(VINT32x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT32x8_CMPEQ_DEFINED) && defined(VINT32x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_CMPEQ_DEFINED) \
+	 && (defined(VINT32x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT32x8_CMPGT_DEFINED) && defined(VINT32x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_CMPGT_DEFINED) \
+	 && (defined(VINT32x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT32x8_CMPLE_DEFINED) && defined(VINT32x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_CMPLE_DEFINED) \
+	 && (defined(VINT32x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT32x8_CMPGE_DEFINED) && defined(VINT32x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_CMPGE_DEFINED) \
+	 && (defined(VINT32x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT32x8_MIN_DEFINED) && defined(VINT32x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_MIN_DEFINED) \
+	 && (defined(VINT32x4_MIN_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_MIN_DEFINED
 #endif
-
-#if !defined(VINT32x8_MAX_DEFINED) && defined(VINT32x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_MAX_DEFINED) \
+	 && (defined(VINT32x4_MAX_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_MAX_DEFINED
 #endif
-
-#if !defined(VINT32x8_RSHIFT_DEFINED) && defined(VINT32x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_RSHIFT_DEFINED) \
+	 && (defined(VINT32x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x8_LRSHIFT_DEFINED) && defined(VINT32x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_LRSHIFT_DEFINED) \
+	 && (defined(VINT32x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x8_LSHIFT_DEFINED) && defined(VINT32x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 32, 8, 4)
+#if !defined(VINT32x8_LSHIFT_DEFINED) \
+	 && (defined(VINT32x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vint32x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint32x8 */
-
-#if !defined(VUINT32x8_SPLAT_DEFINED) && defined(VUINT32x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 32, 8, 4)
+#if !defined(VUINT32x8_SPLAT_DEFINED) \
+	 && (defined(VUINT32x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x)
+{
+	vuint32x8 vec;
+	vec.dbl[0] = vuint32x4_splat(x);
+	vec.dbl[1] = vuint32x4_splat(x);
+	return vec;
+}
 # define VUINT32x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) && defined(VUINT32x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 32, 8, 4)
+#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	vec.dbl[0] = vuint32x4_load_aligned(x);
+	vec.dbl[1] = vuint32x4_load_aligned(x + 4);
+	return vec;
+}
 # define VUINT32x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x8_LOAD_DEFINED) && defined(VUINT32x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 32, 8, 4)
+#if !defined(VUINT32x8_LOAD_DEFINED) \
+	 && (defined(VUINT32x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	vec.dbl[0] = vuint32x4_load(x);
+	vec.dbl[1] = vuint32x4_load(x + 4);
+	return vec;
+}
 # define VUINT32x8_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) && defined(VUINT32x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 32, 8, 4)
+#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8])
+{
+	vuint32x4_store_aligned(vec.dbl[0], x);
+	vuint32x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VUINT32x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x8_STORE_DEFINED) && defined(VUINT32x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 32, 8, 4)
+#if !defined(VUINT32x8_STORE_DEFINED) \
+	 && (defined(VUINT32x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8])
+{
+	vuint32x4_store(vec.dbl[0], x);
+	vuint32x4_store(vec.dbl[1], x + 4);
+}
 # define VUINT32x8_STORE_DEFINED
 #endif
-
-#if !defined(VUINT32x8_ADD_DEFINED) && defined(VUINT32x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 32, 8, 4)
+#if !defined(VUINT32x8_ADD_DEFINED) \
+	 && (defined(VUINT32x4_ADD_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_ADD_DEFINED
 #endif
-
-#if !defined(VUINT32x8_SUB_DEFINED) && defined(VUINT32x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 32, 8, 4)
+#if !defined(VUINT32x8_SUB_DEFINED) \
+	 && (defined(VUINT32x4_SUB_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_SUB_DEFINED
 #endif
-
-#if !defined(VUINT32x8_MUL_DEFINED) && defined(VUINT32x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 32, 8, 4)
+#if !defined(VUINT32x8_MUL_DEFINED) \
+	 && (defined(VUINT32x4_MUL_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_MUL_DEFINED
 #endif
-
-#if !defined(VUINT32x8_DIV_DEFINED) && defined(VUINT32x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 32, 8, 4)
+#if !defined(VUINT32x8_DIV_DEFINED) \
+	 && (defined(VUINT32x4_DIV_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_div(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_DIV_DEFINED
 #endif
-
-#if !defined(VUINT32x8_MOD_DEFINED) && defined(VUINT32x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 32, 8, 4)
+#if !defined(VUINT32x8_MOD_DEFINED) \
+	 && (defined(VUINT32x4_MOD_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_mod(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_MOD_DEFINED
 #endif
-
-#if !defined(VUINT32x8_AVG_DEFINED) && defined(VUINT32x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 32, 8, 4)
+#if !defined(VUINT32x8_AVG_DEFINED) \
+	 && (defined(VUINT32x4_AVG_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_AVG_DEFINED
 #endif
-
-#if !defined(VUINT32x8_AND_DEFINED) && defined(VUINT32x4_AND_DEFINED)
-VEC_DOUBLE_AND(u, 32, 8, 4)
+#if !defined(VUINT32x8_AND_DEFINED) \
+	 && (defined(VUINT32x4_AND_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_AND_DEFINED
 #endif
-
-#if !defined(VUINT32x8_OR_DEFINED) && defined(VUINT32x4_OR_DEFINED)
-VEC_DOUBLE_OR(u, 32, 8, 4)
+#if !defined(VUINT32x8_OR_DEFINED) \
+	 && (defined(VUINT32x4_OR_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_OR_DEFINED
 #endif
-
-#if !defined(VUINT32x8_XOR_DEFINED) && defined(VUINT32x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 32, 8, 4)
+#if !defined(VUINT32x8_XOR_DEFINED) \
+	 && (defined(VUINT32x4_XOR_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_XOR_DEFINED
 #endif
-
-#if !defined(VUINT32x8_NOT_DEFINED) && defined(VUINT32x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 32, 8, 4)
+#if !defined(VUINT32x8_NOT_DEFINED) \
+	 && (defined(VUINT32x4_NOT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec)
+{
+	vec.dbl[0] = vuint32x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint32x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT32x8_NOT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_CMPLT_DEFINED) && defined(VUINT32x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 32, 8, 4)
+#if !defined(VUINT32x8_CMPLT_DEFINED) \
+	 && (defined(VUINT32x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_CMPEQ_DEFINED) && defined(VUINT32x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 32, 8, 4)
+#if !defined(VUINT32x8_CMPEQ_DEFINED) \
+	 && (defined(VUINT32x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT32x8_CMPGT_DEFINED) && defined(VUINT32x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 32, 8, 4)
+#if !defined(VUINT32x8_CMPGT_DEFINED) \
+	 && (defined(VUINT32x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_CMPLE_DEFINED) && defined(VUINT32x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 32, 8, 4)
+#if !defined(VUINT32x8_CMPLE_DEFINED) \
+	 && (defined(VUINT32x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT32x8_CMPGE_DEFINED) && defined(VUINT32x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 32, 8, 4)
+#if !defined(VUINT32x8_CMPGE_DEFINED) \
+	 && (defined(VUINT32x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT32x8_MIN_DEFINED) && defined(VUINT32x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 32, 8, 4)
+#if !defined(VUINT32x8_MIN_DEFINED) \
+	 && (defined(VUINT32x4_MIN_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_MIN_DEFINED
 #endif
-
-#if !defined(VUINT32x8_MAX_DEFINED) && defined(VUINT32x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 32, 8, 4)
+#if !defined(VUINT32x8_MAX_DEFINED) \
+	 && (defined(VUINT32x4_MAX_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_MAX_DEFINED
 #endif
-
-#if !defined(VUINT32x8_RSHIFT_DEFINED) && defined(VUINT32x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 32, 8, 4)
+#if !defined(VUINT32x8_RSHIFT_DEFINED) \
+	 && (defined(VUINT32x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_LRSHIFT_DEFINED) && defined(VUINT32x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 32, 8, 4)
+#if !defined(VUINT32x8_LRSHIFT_DEFINED) \
+	 && (defined(VUINT32x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x8_LSHIFT_DEFINED) && defined(VUINT32x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 32, 8, 4)
+#if !defined(VUINT32x8_LSHIFT_DEFINED) \
+	 && (defined(VUINT32x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.dbl[0] = vuint32x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint32x16 */
-
-#if !defined(VINT32x16_SPLAT_DEFINED) && defined(VINT32x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_SPLAT_DEFINED) \
+	 && (defined(VINT32x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x)
+{
+	vint32x16 vec;
+	vec.dbl[0] = vint32x8_splat(x);
+	vec.dbl[1] = vint32x8_splat(x);
+	return vec;
+}
 # define VINT32x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) && defined(VINT32x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT32x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	vec.dbl[0] = vint32x8_load_aligned(x);
+	vec.dbl[1] = vint32x8_load_aligned(x + 8);
+	return vec;
+}
 # define VINT32x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x16_LOAD_DEFINED) && defined(VINT32x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_LOAD_DEFINED) \
+	 && (defined(VINT32x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	vec.dbl[0] = vint32x8_load(x);
+	vec.dbl[1] = vint32x8_load(x + 8);
+	return vec;
+}
 # define VINT32x16_LOAD_DEFINED
 #endif
-
-#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) && defined(VINT32x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT32x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16])
+{
+	vint32x8_store_aligned(vec.dbl[0], x);
+	vint32x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VINT32x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT32x16_STORE_DEFINED) && defined(VINT32x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_STORE_DEFINED) \
+	 && (defined(VINT32x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16])
+{
+	vint32x8_store(vec.dbl[0], x);
+	vint32x8_store(vec.dbl[1], x + 8);
+}
 # define VINT32x16_STORE_DEFINED
 #endif
-
-#if !defined(VINT32x16_ADD_DEFINED) && defined(VINT32x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_ADD_DEFINED) \
+	 && (defined(VINT32x8_ADD_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_ADD_DEFINED
 #endif
-
-#if !defined(VINT32x16_SUB_DEFINED) && defined(VINT32x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_SUB_DEFINED) \
+	 && (defined(VINT32x8_SUB_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_SUB_DEFINED
 #endif
-
-#if !defined(VINT32x16_MUL_DEFINED) && defined(VINT32x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_MUL_DEFINED) \
+	 && (defined(VINT32x8_MUL_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_MUL_DEFINED
 #endif
-
-#if !defined(VINT32x16_DIV_DEFINED) && defined(VINT32x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_DIV_DEFINED) \
+	 && (defined(VINT32x8_DIV_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_div(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_DIV_DEFINED
 #endif
-
-#if !defined(VINT32x16_MOD_DEFINED) && defined(VINT32x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_MOD_DEFINED) \
+	 && (defined(VINT32x8_MOD_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_mod(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_MOD_DEFINED
 #endif
-
-#if !defined(VINT32x16_AVG_DEFINED) && defined(VINT32x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_AVG_DEFINED) \
+	 && (defined(VINT32x8_AVG_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_AVG_DEFINED
 #endif
-
-#if !defined(VINT32x16_AND_DEFINED) && defined(VINT32x8_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_AND_DEFINED) \
+	 && (defined(VINT32x8_AND_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_AND_DEFINED
 #endif
-
-#if !defined(VINT32x16_OR_DEFINED) && defined(VINT32x8_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_OR_DEFINED) \
+	 && (defined(VINT32x8_OR_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_OR_DEFINED
 #endif
-
-#if !defined(VINT32x16_XOR_DEFINED) && defined(VINT32x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_XOR_DEFINED) \
+	 && (defined(VINT32x8_XOR_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_XOR_DEFINED
 #endif
-
-#if !defined(VINT32x16_NOT_DEFINED) && defined(VINT32x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_NOT_DEFINED) \
+	 && (defined(VINT32x8_NOT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec)
+{
+	vec.dbl[0] = vint32x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vint32x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT32x16_NOT_DEFINED
 #endif
-
-#if !defined(VINT32x16_CMPLT_DEFINED) && defined(VINT32x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_CMPLT_DEFINED) \
+	 && (defined(VINT32x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT32x16_CMPEQ_DEFINED) && defined(VINT32x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_CMPEQ_DEFINED) \
+	 && (defined(VINT32x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT32x16_CMPGT_DEFINED) && defined(VINT32x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_CMPGT_DEFINED) \
+	 && (defined(VINT32x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT32x16_CMPLE_DEFINED) && defined(VINT32x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_CMPLE_DEFINED) \
+	 && (defined(VINT32x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT32x16_CMPGE_DEFINED) && defined(VINT32x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_CMPGE_DEFINED) \
+	 && (defined(VINT32x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT32x16_MIN_DEFINED) && defined(VINT32x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_MIN_DEFINED) \
+	 && (defined(VINT32x8_MIN_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_MIN_DEFINED
 #endif
-
-#if !defined(VINT32x16_MAX_DEFINED) && defined(VINT32x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_MAX_DEFINED) \
+	 && (defined(VINT32x8_MAX_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_MAX_DEFINED
 #endif
-
-#if !defined(VINT32x16_RSHIFT_DEFINED) && defined(VINT32x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_RSHIFT_DEFINED) \
+	 && (defined(VINT32x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x16_LRSHIFT_DEFINED) && defined(VINT32x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_LRSHIFT_DEFINED) \
+	 && (defined(VINT32x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT32x16_LSHIFT_DEFINED) && defined(VINT32x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 32, 16, 8)
+#if !defined(VINT32x16_LSHIFT_DEFINED) \
+	 && (defined(VINT32x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vint32x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint32x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT32x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint32x16 */
-
-#if !defined(VUINT32x16_SPLAT_DEFINED) && defined(VUINT32x8_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 32, 16, 8)
+#if !defined(VUINT32x16_SPLAT_DEFINED) \
+	 && (defined(VUINT32x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x)
+{
+	vuint32x16 vec;
+	vec.dbl[0] = vuint32x8_splat(x);
+	vec.dbl[1] = vuint32x8_splat(x);
+	return vec;
+}
 # define VUINT32x16_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) && defined(VUINT32x8_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 32, 16, 8)
+#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	vec.dbl[0] = vuint32x8_load_aligned(x);
+	vec.dbl[1] = vuint32x8_load_aligned(x + 8);
+	return vec;
+}
 # define VUINT32x16_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x16_LOAD_DEFINED) && defined(VUINT32x8_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 32, 16, 8)
+#if !defined(VUINT32x16_LOAD_DEFINED) \
+	 && (defined(VUINT32x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	vec.dbl[0] = vuint32x8_load(x);
+	vec.dbl[1] = vuint32x8_load(x + 8);
+	return vec;
+}
 # define VUINT32x16_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) && defined(VUINT32x8_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 32, 16, 8)
+#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT32x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16])
+{
+	vuint32x8_store_aligned(vec.dbl[0], x);
+	vuint32x8_store_aligned(vec.dbl[1], x + 8);
+}
 # define VUINT32x16_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT32x16_STORE_DEFINED) && defined(VUINT32x8_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 32, 16, 8)
+#if !defined(VUINT32x16_STORE_DEFINED) \
+	 && (defined(VUINT32x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16])
+{
+	vuint32x8_store(vec.dbl[0], x);
+	vuint32x8_store(vec.dbl[1], x + 8);
+}
 # define VUINT32x16_STORE_DEFINED
 #endif
-
-#if !defined(VUINT32x16_ADD_DEFINED) && defined(VUINT32x8_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 32, 16, 8)
+#if !defined(VUINT32x16_ADD_DEFINED) \
+	 && (defined(VUINT32x8_ADD_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_ADD_DEFINED
 #endif
-
-#if !defined(VUINT32x16_SUB_DEFINED) && defined(VUINT32x8_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 32, 16, 8)
+#if !defined(VUINT32x16_SUB_DEFINED) \
+	 && (defined(VUINT32x8_SUB_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_SUB_DEFINED
 #endif
-
-#if !defined(VUINT32x16_MUL_DEFINED) && defined(VUINT32x8_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 32, 16, 8)
+#if !defined(VUINT32x16_MUL_DEFINED) \
+	 && (defined(VUINT32x8_MUL_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_MUL_DEFINED
 #endif
-
-#if !defined(VUINT32x16_DIV_DEFINED) && defined(VUINT32x8_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 32, 16, 8)
+#if !defined(VUINT32x16_DIV_DEFINED) \
+	 && (defined(VUINT32x8_DIV_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_div(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_DIV_DEFINED
 #endif
-
-#if !defined(VUINT32x16_MOD_DEFINED) && defined(VUINT32x8_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 32, 16, 8)
+#if !defined(VUINT32x16_MOD_DEFINED) \
+	 && (defined(VUINT32x8_MOD_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_mod(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_MOD_DEFINED
 #endif
-
-#if !defined(VUINT32x16_AVG_DEFINED) && defined(VUINT32x8_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 32, 16, 8)
+#if !defined(VUINT32x16_AVG_DEFINED) \
+	 && (defined(VUINT32x8_AVG_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_AVG_DEFINED
 #endif
-
-#if !defined(VUINT32x16_AND_DEFINED) && defined(VUINT32x8_AND_DEFINED)
-VEC_DOUBLE_AND(u, 32, 16, 8)
+#if !defined(VUINT32x16_AND_DEFINED) \
+	 && (defined(VUINT32x8_AND_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_AND_DEFINED
 #endif
-
-#if !defined(VUINT32x16_OR_DEFINED) && defined(VUINT32x8_OR_DEFINED)
-VEC_DOUBLE_OR(u, 32, 16, 8)
+#if !defined(VUINT32x16_OR_DEFINED) \
+	 && (defined(VUINT32x8_OR_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_OR_DEFINED
 #endif
-
-#if !defined(VUINT32x16_XOR_DEFINED) && defined(VUINT32x8_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 32, 16, 8)
+#if !defined(VUINT32x16_XOR_DEFINED) \
+	 && (defined(VUINT32x8_XOR_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_XOR_DEFINED
 #endif
-
-#if !defined(VUINT32x16_NOT_DEFINED) && defined(VUINT32x8_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 32, 16, 8)
+#if !defined(VUINT32x16_NOT_DEFINED) \
+	 && (defined(VUINT32x8_NOT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec)
+{
+	vec.dbl[0] = vuint32x8_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint32x8_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT32x16_NOT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_CMPLT_DEFINED) && defined(VUINT32x8_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 32, 16, 8)
+#if !defined(VUINT32x16_CMPLT_DEFINED) \
+	 && (defined(VUINT32x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_CMPEQ_DEFINED) && defined(VUINT32x8_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 32, 16, 8)
+#if !defined(VUINT32x16_CMPEQ_DEFINED) \
+	 && (defined(VUINT32x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT32x16_CMPGT_DEFINED) && defined(VUINT32x8_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 32, 16, 8)
+#if !defined(VUINT32x16_CMPGT_DEFINED) \
+	 && (defined(VUINT32x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_CMPLE_DEFINED) && defined(VUINT32x8_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 32, 16, 8)
+#if !defined(VUINT32x16_CMPLE_DEFINED) \
+	 && (defined(VUINT32x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT32x16_CMPGE_DEFINED) && defined(VUINT32x8_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 32, 16, 8)
+#if !defined(VUINT32x16_CMPGE_DEFINED) \
+	 && (defined(VUINT32x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT32x16_MIN_DEFINED) && defined(VUINT32x8_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 32, 16, 8)
+#if !defined(VUINT32x16_MIN_DEFINED) \
+	 && (defined(VUINT32x8_MIN_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_MIN_DEFINED
 #endif
-
-#if !defined(VUINT32x16_MAX_DEFINED) && defined(VUINT32x8_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 32, 16, 8)
+#if !defined(VUINT32x16_MAX_DEFINED) \
+	 && (defined(VUINT32x8_MAX_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_MAX_DEFINED
 #endif
-
-#if !defined(VUINT32x16_RSHIFT_DEFINED) && defined(VUINT32x8_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 32, 16, 8)
+#if !defined(VUINT32x16_RSHIFT_DEFINED) \
+	 && (defined(VUINT32x8_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_LRSHIFT_DEFINED) && defined(VUINT32x8_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 32, 16, 8)
+#if !defined(VUINT32x16_LRSHIFT_DEFINED) \
+	 && (defined(VUINT32x8_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT32x16_LSHIFT_DEFINED) && defined(VUINT32x8_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 32, 16, 8)
+#if !defined(VUINT32x16_LSHIFT_DEFINED) \
+	 && (defined(VUINT32x8_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.dbl[0] = vuint32x8_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint32x8_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT32x16_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint64x4 */
-
-#if !defined(VINT64x4_SPLAT_DEFINED) && defined(VINT64x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x2_SPLAT_DEFINED) \
+	 && (defined(VINT64x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x)
+{
+	vint64x2 vec;
+	vec.dbl[0] = vint64x1_splat(x);
+	vec.dbl[1] = vint64x1_splat(x);
+	return vec;
+}
+# define VINT64x2_SPLAT_DEFINED
+#endif
+#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT64x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	vec.dbl[0] = vint64x1_load_aligned(x);
+	vec.dbl[1] = vint64x1_load_aligned(x + 1);
+	return vec;
+}
+# define VINT64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x2_LOAD_DEFINED) \
+	 && (defined(VINT64x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	vec.dbl[0] = vint64x1_load(x);
+	vec.dbl[1] = vint64x1_load(x + 1);
+	return vec;
+}
+# define VINT64x2_LOAD_DEFINED
+#endif
+#if !defined(VINT64x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT64x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2])
+{
+	vint64x1_store_aligned(vec.dbl[0], x);
+	vint64x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VINT64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x2_STORE_DEFINED) \
+	 && (defined(VINT64x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2])
+{
+	vint64x1_store(vec.dbl[0], x);
+	vint64x1_store(vec.dbl[1], x + 1);
+}
+# define VINT64x2_STORE_DEFINED
+#endif
+#if !defined(VINT64x2_ADD_DEFINED) \
+	 && (defined(VINT64x1_ADD_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_ADD_DEFINED
+#endif
+#if !defined(VINT64x2_SUB_DEFINED) \
+	 && (defined(VINT64x1_SUB_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_SUB_DEFINED
+#endif
+#if !defined(VINT64x2_MUL_DEFINED) \
+	 && (defined(VINT64x1_MUL_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_MUL_DEFINED
+#endif
+#if !defined(VINT64x2_DIV_DEFINED) \
+	 && (defined(VINT64x1_DIV_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_div(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_DIV_DEFINED
+#endif
+#if !defined(VINT64x2_MOD_DEFINED) \
+	 && (defined(VINT64x1_MOD_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_mod(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_MOD_DEFINED
+#endif
+#if !defined(VINT64x2_AVG_DEFINED) \
+	 && (defined(VINT64x1_AVG_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_AVG_DEFINED
+#endif
+#if !defined(VINT64x2_AND_DEFINED) \
+	 && (defined(VINT64x1_AND_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_AND_DEFINED
+#endif
+#if !defined(VINT64x2_OR_DEFINED) \
+	 && (defined(VINT64x1_OR_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_OR_DEFINED
+#endif
+#if !defined(VINT64x2_XOR_DEFINED) \
+	 && (defined(VINT64x1_XOR_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_XOR_DEFINED
+#endif
+#if !defined(VINT64x2_NOT_DEFINED) \
+	 && (defined(VINT64x1_NOT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec)
+{
+	vec.dbl[0] = vint64x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vint64x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VINT64x2_NOT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPLT_DEFINED) \
+	 && (defined(VINT64x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPEQ_DEFINED) \
+	 && (defined(VINT64x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x2_CMPGT_DEFINED) \
+	 && (defined(VINT64x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPLE_DEFINED) \
+	 && (defined(VINT64x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x2_CMPGE_DEFINED) \
+	 && (defined(VINT64x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x2_MIN_DEFINED) \
+	 && (defined(VINT64x1_MIN_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_MIN_DEFINED
+#endif
+#if !defined(VINT64x2_MAX_DEFINED) \
+	 && (defined(VINT64x1_MAX_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_MAX_DEFINED
+#endif
+#if !defined(VINT64x2_RSHIFT_DEFINED) \
+	 && (defined(VINT64x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_RSHIFT_DEFINED
+#endif
+#if !defined(VINT64x2_LRSHIFT_DEFINED) \
+	 && (defined(VINT64x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT64x2_LSHIFT_DEFINED) \
+	 && (defined(VINT64x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vint64x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VINT64x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x2_SPLAT_DEFINED) \
+	 && (defined(VUINT64x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x)
+{
+	vuint64x2 vec;
+	vec.dbl[0] = vuint64x1_splat(x);
+	vec.dbl[1] = vuint64x1_splat(x);
+	return vec;
+}
+# define VUINT64x2_SPLAT_DEFINED
+#endif
+#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	vec.dbl[0] = vuint64x1_load_aligned(x);
+	vec.dbl[1] = vuint64x1_load_aligned(x + 1);
+	return vec;
+}
+# define VUINT64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x2_LOAD_DEFINED) \
+	 && (defined(VUINT64x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	vec.dbl[0] = vuint64x1_load(x);
+	vec.dbl[1] = vuint64x1_load(x + 1);
+	return vec;
+}
+# define VUINT64x2_LOAD_DEFINED
+#endif
+#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2])
+{
+	vuint64x1_store_aligned(vec.dbl[0], x);
+	vuint64x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VUINT64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x2_STORE_DEFINED) \
+	 && (defined(VUINT64x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2])
+{
+	vuint64x1_store(vec.dbl[0], x);
+	vuint64x1_store(vec.dbl[1], x + 1);
+}
+# define VUINT64x2_STORE_DEFINED
+#endif
+#if !defined(VUINT64x2_ADD_DEFINED) \
+	 && (defined(VUINT64x1_ADD_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_ADD_DEFINED
+#endif
+#if !defined(VUINT64x2_SUB_DEFINED) \
+	 && (defined(VUINT64x1_SUB_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_SUB_DEFINED
+#endif
+#if !defined(VUINT64x2_MUL_DEFINED) \
+	 && (defined(VUINT64x1_MUL_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_MUL_DEFINED
+#endif
+#if !defined(VUINT64x2_DIV_DEFINED) \
+	 && (defined(VUINT64x1_DIV_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_div(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_DIV_DEFINED
+#endif
+#if !defined(VUINT64x2_MOD_DEFINED) \
+	 && (defined(VUINT64x1_MOD_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_mod(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_MOD_DEFINED
+#endif
+#if !defined(VUINT64x2_AVG_DEFINED) \
+	 && (defined(VUINT64x1_AVG_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_AVG_DEFINED
+#endif
+#if !defined(VUINT64x2_AND_DEFINED) \
+	 && (defined(VUINT64x1_AND_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_AND_DEFINED
+#endif
+#if !defined(VUINT64x2_OR_DEFINED) \
+	 && (defined(VUINT64x1_OR_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_OR_DEFINED
+#endif
+#if !defined(VUINT64x2_XOR_DEFINED) \
+	 && (defined(VUINT64x1_XOR_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_XOR_DEFINED
+#endif
+#if !defined(VUINT64x2_NOT_DEFINED) \
+	 && (defined(VUINT64x1_NOT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec)
+{
+	vec.dbl[0] = vuint64x1_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint64x1_not(vec.dbl[1]);
+	return vec;
+}
+# define VUINT64x2_NOT_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPLT_DEFINED) \
+	 && (defined(VUINT64x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_CMPLT_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPEQ_DEFINED) \
+	 && (defined(VUINT64x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPGT_DEFINED) \
+	 && (defined(VUINT64x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_CMPGT_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPLE_DEFINED) \
+	 && (defined(VUINT64x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_CMPLE_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPGE_DEFINED) \
+	 && (defined(VUINT64x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_CMPGE_DEFINED
+#endif
+#if !defined(VUINT64x2_MIN_DEFINED) \
+	 && (defined(VUINT64x1_MIN_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_MIN_DEFINED
+#endif
+#if !defined(VUINT64x2_MAX_DEFINED) \
+	 && (defined(VUINT64x1_MAX_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_MAX_DEFINED
+#endif
+#if !defined(VUINT64x2_RSHIFT_DEFINED) \
+	 && (defined(VUINT64x1_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x2_LRSHIFT_DEFINED) \
+	 && (defined(VUINT64x1_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x2_LSHIFT_DEFINED) \
+	 && (defined(VUINT64x1_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.dbl[0] = vuint64x1_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x1_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VUINT64x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x4_SPLAT_DEFINED) \
+	 && (defined(VINT64x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x)
+{
+	vint64x4 vec;
+	vec.dbl[0] = vint64x2_splat(x);
+	vec.dbl[1] = vint64x2_splat(x);
+	return vec;
+}
 # define VINT64x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) && defined(VINT64x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT64x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	vec.dbl[0] = vint64x2_load_aligned(x);
+	vec.dbl[1] = vint64x2_load_aligned(x + 2);
+	return vec;
+}
 # define VINT64x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT64x4_LOAD_DEFINED) && defined(VINT64x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_LOAD_DEFINED) \
+	 && (defined(VINT64x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	vec.dbl[0] = vint64x2_load(x);
+	vec.dbl[1] = vint64x2_load(x + 2);
+	return vec;
+}
 # define VINT64x4_LOAD_DEFINED
 #endif
-
-#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) && defined(VINT64x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT64x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4])
+{
+	vint64x2_store_aligned(vec.dbl[0], x);
+	vint64x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VINT64x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT64x4_STORE_DEFINED) && defined(VINT64x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_STORE_DEFINED) \
+	 && (defined(VINT64x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4])
+{
+	vint64x2_store(vec.dbl[0], x);
+	vint64x2_store(vec.dbl[1], x + 2);
+}
 # define VINT64x4_STORE_DEFINED
 #endif
-
-#if !defined(VINT64x4_ADD_DEFINED) && defined(VINT64x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_ADD_DEFINED) \
+	 && (defined(VINT64x2_ADD_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_ADD_DEFINED
 #endif
-
-#if !defined(VINT64x4_SUB_DEFINED) && defined(VINT64x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_SUB_DEFINED) \
+	 && (defined(VINT64x2_SUB_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_SUB_DEFINED
 #endif
-
-#if !defined(VINT64x4_MUL_DEFINED) && defined(VINT64x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_MUL_DEFINED) \
+	 && (defined(VINT64x2_MUL_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_MUL_DEFINED
 #endif
-
-#if !defined(VINT64x4_DIV_DEFINED) && defined(VINT64x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_DIV_DEFINED) \
+	 && (defined(VINT64x2_DIV_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_div(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_DIV_DEFINED
 #endif
-
-#if !defined(VINT64x4_MOD_DEFINED) && defined(VINT64x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_MOD_DEFINED) \
+	 && (defined(VINT64x2_MOD_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_mod(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_MOD_DEFINED
 #endif
-
-#if !defined(VINT64x4_AVG_DEFINED) && defined(VINT64x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_AVG_DEFINED) \
+	 && (defined(VINT64x2_AVG_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_AVG_DEFINED
 #endif
-
-#if !defined(VINT64x4_AND_DEFINED) && defined(VINT64x2_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_AND_DEFINED) \
+	 && (defined(VINT64x2_AND_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_AND_DEFINED
 #endif
-
-#if !defined(VINT64x4_OR_DEFINED) && defined(VINT64x2_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_OR_DEFINED) \
+	 && (defined(VINT64x2_OR_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_OR_DEFINED
 #endif
-
-#if !defined(VINT64x4_XOR_DEFINED) && defined(VINT64x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_XOR_DEFINED) \
+	 && (defined(VINT64x2_XOR_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_XOR_DEFINED
 #endif
-
-#if !defined(VINT64x4_NOT_DEFINED) && defined(VINT64x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_NOT_DEFINED) \
+	 && (defined(VINT64x2_NOT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec)
+{
+	vec.dbl[0] = vint64x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vint64x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT64x4_NOT_DEFINED
 #endif
-
-#if !defined(VINT64x4_CMPLT_DEFINED) && defined(VINT64x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_CMPLT_DEFINED) \
+	 && (defined(VINT64x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT64x4_CMPEQ_DEFINED) && defined(VINT64x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_CMPEQ_DEFINED) \
+	 && (defined(VINT64x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT64x4_CMPGT_DEFINED) && defined(VINT64x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_CMPGT_DEFINED) \
+	 && (defined(VINT64x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT64x4_CMPLE_DEFINED) && defined(VINT64x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_CMPLE_DEFINED) \
+	 && (defined(VINT64x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT64x4_CMPGE_DEFINED) && defined(VINT64x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_CMPGE_DEFINED) \
+	 && (defined(VINT64x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT64x4_MIN_DEFINED) && defined(VINT64x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_MIN_DEFINED) \
+	 && (defined(VINT64x2_MIN_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_MIN_DEFINED
 #endif
-
-#if !defined(VINT64x4_MAX_DEFINED) && defined(VINT64x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_MAX_DEFINED) \
+	 && (defined(VINT64x2_MAX_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_MAX_DEFINED
 #endif
-
-#if !defined(VINT64x4_RSHIFT_DEFINED) && defined(VINT64x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_RSHIFT_DEFINED) \
+	 && (defined(VINT64x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT64x4_LRSHIFT_DEFINED) && defined(VINT64x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_LRSHIFT_DEFINED) \
+	 && (defined(VINT64x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT64x4_LSHIFT_DEFINED) && defined(VINT64x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 64, 4, 2)
+#if !defined(VINT64x4_LSHIFT_DEFINED) \
+	 && (defined(VINT64x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vint64x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint64x4 */
-
-#if !defined(VUINT64x4_SPLAT_DEFINED) && defined(VUINT64x2_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 64, 4, 2)
+#if !defined(VUINT64x4_SPLAT_DEFINED) \
+	 && (defined(VUINT64x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x)
+{
+	vuint64x4 vec;
+	vec.dbl[0] = vuint64x2_splat(x);
+	vec.dbl[1] = vuint64x2_splat(x);
+	return vec;
+}
 # define VUINT64x4_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) && defined(VUINT64x2_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 64, 4, 2)
+#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	vec.dbl[0] = vuint64x2_load_aligned(x);
+	vec.dbl[1] = vuint64x2_load_aligned(x + 2);
+	return vec;
+}
 # define VUINT64x4_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT64x4_LOAD_DEFINED) && defined(VUINT64x2_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 64, 4, 2)
+#if !defined(VUINT64x4_LOAD_DEFINED) \
+	 && (defined(VUINT64x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	vec.dbl[0] = vuint64x2_load(x);
+	vec.dbl[1] = vuint64x2_load(x + 2);
+	return vec;
+}
 # define VUINT64x4_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) && defined(VUINT64x2_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 64, 4, 2)
+#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4])
+{
+	vuint64x2_store_aligned(vec.dbl[0], x);
+	vuint64x2_store_aligned(vec.dbl[1], x + 2);
+}
 # define VUINT64x4_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT64x4_STORE_DEFINED) && defined(VUINT64x2_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 64, 4, 2)
+#if !defined(VUINT64x4_STORE_DEFINED) \
+	 && (defined(VUINT64x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4])
+{
+	vuint64x2_store(vec.dbl[0], x);
+	vuint64x2_store(vec.dbl[1], x + 2);
+}
 # define VUINT64x4_STORE_DEFINED
 #endif
-
-#if !defined(VUINT64x4_ADD_DEFINED) && defined(VUINT64x2_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 64, 4, 2)
+#if !defined(VUINT64x4_ADD_DEFINED) \
+	 && (defined(VUINT64x2_ADD_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_ADD_DEFINED
 #endif
-
-#if !defined(VUINT64x4_SUB_DEFINED) && defined(VUINT64x2_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 64, 4, 2)
+#if !defined(VUINT64x4_SUB_DEFINED) \
+	 && (defined(VUINT64x2_SUB_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_SUB_DEFINED
 #endif
-
-#if !defined(VUINT64x4_MUL_DEFINED) && defined(VUINT64x2_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 64, 4, 2)
+#if !defined(VUINT64x4_MUL_DEFINED) \
+	 && (defined(VUINT64x2_MUL_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_MUL_DEFINED
 #endif
-
-#if !defined(VUINT64x4_DIV_DEFINED) && defined(VUINT64x2_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 64, 4, 2)
+#if !defined(VUINT64x4_DIV_DEFINED) \
+	 && (defined(VUINT64x2_DIV_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_div(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_DIV_DEFINED
 #endif
-
-#if !defined(VUINT64x4_MOD_DEFINED) && defined(VUINT64x2_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 64, 4, 2)
+#if !defined(VUINT64x4_MOD_DEFINED) \
+	 && (defined(VUINT64x2_MOD_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_mod(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_MOD_DEFINED
 #endif
-
-#if !defined(VUINT64x4_AVG_DEFINED) && defined(VUINT64x2_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 64, 4, 2)
+#if !defined(VUINT64x4_AVG_DEFINED) \
+	 && (defined(VUINT64x2_AVG_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_AVG_DEFINED
 #endif
-
-#if !defined(VUINT64x4_AND_DEFINED) && defined(VUINT64x2_AND_DEFINED)
-VEC_DOUBLE_AND(u, 64, 4, 2)
+#if !defined(VUINT64x4_AND_DEFINED) \
+	 && (defined(VUINT64x2_AND_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_AND_DEFINED
 #endif
-
-#if !defined(VUINT64x4_OR_DEFINED) && defined(VUINT64x2_OR_DEFINED)
-VEC_DOUBLE_OR(u, 64, 4, 2)
+#if !defined(VUINT64x4_OR_DEFINED) \
+	 && (defined(VUINT64x2_OR_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_OR_DEFINED
 #endif
-
-#if !defined(VUINT64x4_XOR_DEFINED) && defined(VUINT64x2_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 64, 4, 2)
+#if !defined(VUINT64x4_XOR_DEFINED) \
+	 && (defined(VUINT64x2_XOR_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_XOR_DEFINED
 #endif
-
-#if !defined(VUINT64x4_NOT_DEFINED) && defined(VUINT64x2_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 64, 4, 2)
+#if !defined(VUINT64x4_NOT_DEFINED) \
+	 && (defined(VUINT64x2_NOT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec)
+{
+	vec.dbl[0] = vuint64x2_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint64x2_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT64x4_NOT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_CMPLT_DEFINED) && defined(VUINT64x2_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 64, 4, 2)
+#if !defined(VUINT64x4_CMPLT_DEFINED) \
+	 && (defined(VUINT64x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_CMPEQ_DEFINED) && defined(VUINT64x2_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 64, 4, 2)
+#if !defined(VUINT64x4_CMPEQ_DEFINED) \
+	 && (defined(VUINT64x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT64x4_CMPGT_DEFINED) && defined(VUINT64x2_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 64, 4, 2)
+#if !defined(VUINT64x4_CMPGT_DEFINED) \
+	 && (defined(VUINT64x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_CMPLE_DEFINED) && defined(VUINT64x2_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 64, 4, 2)
+#if !defined(VUINT64x4_CMPLE_DEFINED) \
+	 && (defined(VUINT64x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT64x4_CMPGE_DEFINED) && defined(VUINT64x2_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 64, 4, 2)
+#if !defined(VUINT64x4_CMPGE_DEFINED) \
+	 && (defined(VUINT64x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT64x4_MIN_DEFINED) && defined(VUINT64x2_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 64, 4, 2)
+#if !defined(VUINT64x4_MIN_DEFINED) \
+	 && (defined(VUINT64x2_MIN_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_MIN_DEFINED
 #endif
-
-#if !defined(VUINT64x4_MAX_DEFINED) && defined(VUINT64x2_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 64, 4, 2)
+#if !defined(VUINT64x4_MAX_DEFINED) \
+	 && (defined(VUINT64x2_MAX_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_MAX_DEFINED
 #endif
-
-#if !defined(VUINT64x4_RSHIFT_DEFINED) && defined(VUINT64x2_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 64, 4, 2)
+#if !defined(VUINT64x4_RSHIFT_DEFINED) \
+	 && (defined(VUINT64x2_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_LRSHIFT_DEFINED) && defined(VUINT64x2_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 64, 4, 2)
+#if !defined(VUINT64x4_LRSHIFT_DEFINED) \
+	 && (defined(VUINT64x2_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT64x4_LSHIFT_DEFINED) && defined(VUINT64x2_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 64, 4, 2)
+#if !defined(VUINT64x4_LSHIFT_DEFINED) \
+	 && (defined(VUINT64x2_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.dbl[0] = vuint64x2_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x2_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x4_LSHIFT_DEFINED
 #endif
-
-
-
-/* vuint64x8 */
-
-#if !defined(VINT64x8_SPLAT_DEFINED) && defined(VINT64x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_SPLAT_DEFINED) \
+	 && (defined(VINT64x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x)
+{
+	vint64x8 vec;
+	vec.dbl[0] = vint64x4_splat(x);
+	vec.dbl[1] = vint64x4_splat(x);
+	return vec;
+}
 # define VINT64x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) && defined(VINT64x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VINT64x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	vec.dbl[0] = vint64x4_load_aligned(x);
+	vec.dbl[1] = vint64x4_load_aligned(x + 4);
+	return vec;
+}
 # define VINT64x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT64x8_LOAD_DEFINED) && defined(VINT64x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_LOAD_DEFINED) \
+	 && (defined(VINT64x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	vec.dbl[0] = vint64x4_load(x);
+	vec.dbl[1] = vint64x4_load(x + 4);
+	return vec;
+}
 # define VINT64x8_LOAD_DEFINED
 #endif
-
-#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) && defined(VINT64x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VINT64x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8])
+{
+	vint64x4_store_aligned(vec.dbl[0], x);
+	vint64x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VINT64x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VINT64x8_STORE_DEFINED) && defined(VINT64x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_STORE_DEFINED) \
+	 && (defined(VINT64x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8])
+{
+	vint64x4_store(vec.dbl[0], x);
+	vint64x4_store(vec.dbl[1], x + 4);
+}
 # define VINT64x8_STORE_DEFINED
 #endif
-
-#if !defined(VINT64x8_ADD_DEFINED) && defined(VINT64x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_ADD_DEFINED) \
+	 && (defined(VINT64x4_ADD_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_ADD_DEFINED
 #endif
-
-#if !defined(VINT64x8_SUB_DEFINED) && defined(VINT64x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_SUB_DEFINED) \
+	 && (defined(VINT64x4_SUB_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_SUB_DEFINED
 #endif
-
-#if !defined(VINT64x8_MUL_DEFINED) && defined(VINT64x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_MUL_DEFINED) \
+	 && (defined(VINT64x4_MUL_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_MUL_DEFINED
 #endif
-
-#if !defined(VINT64x8_DIV_DEFINED) && defined(VINT64x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_DIV_DEFINED) \
+	 && (defined(VINT64x4_DIV_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_div(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_DIV_DEFINED
 #endif
-
-#if !defined(VINT64x8_MOD_DEFINED) && defined(VINT64x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_MOD_DEFINED) \
+	 && (defined(VINT64x4_MOD_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_mod(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_MOD_DEFINED
 #endif
-
-#if !defined(VINT64x8_AVG_DEFINED) && defined(VINT64x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_AVG_DEFINED) \
+	 && (defined(VINT64x4_AVG_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_AVG_DEFINED
 #endif
-
-#if !defined(VINT64x8_AND_DEFINED) && defined(VINT64x4_AND_DEFINED)
-VEC_DOUBLE_AND(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_AND_DEFINED) \
+	 && (defined(VINT64x4_AND_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_AND_DEFINED
 #endif
-
-#if !defined(VINT64x8_OR_DEFINED) && defined(VINT64x4_OR_DEFINED)
-VEC_DOUBLE_OR(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_OR_DEFINED) \
+	 && (defined(VINT64x4_OR_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_OR_DEFINED
 #endif
-
-#if !defined(VINT64x8_XOR_DEFINED) && defined(VINT64x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_XOR_DEFINED) \
+	 && (defined(VINT64x4_XOR_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_XOR_DEFINED
 #endif
-
-#if !defined(VINT64x8_NOT_DEFINED) && defined(VINT64x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_NOT_DEFINED) \
+	 && (defined(VINT64x4_NOT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec)
+{
+	vec.dbl[0] = vint64x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vint64x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VINT64x8_NOT_DEFINED
 #endif
-
-#if !defined(VINT64x8_CMPLT_DEFINED) && defined(VINT64x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_CMPLT_DEFINED) \
+	 && (defined(VINT64x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VINT64x8_CMPEQ_DEFINED) && defined(VINT64x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_CMPEQ_DEFINED) \
+	 && (defined(VINT64x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VINT64x8_CMPGT_DEFINED) && defined(VINT64x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_CMPGT_DEFINED) \
+	 && (defined(VINT64x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VINT64x8_CMPLE_DEFINED) && defined(VINT64x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_CMPLE_DEFINED) \
+	 && (defined(VINT64x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VINT64x8_CMPGE_DEFINED) && defined(VINT64x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_CMPGE_DEFINED) \
+	 && (defined(VINT64x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VINT64x8_MIN_DEFINED) && defined(VINT64x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_MIN_DEFINED) \
+	 && (defined(VINT64x4_MIN_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_MIN_DEFINED
 #endif
-
-#if !defined(VINT64x8_MAX_DEFINED) && defined(VINT64x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_MAX_DEFINED) \
+	 && (defined(VINT64x4_MAX_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_MAX_DEFINED
 #endif
-
-#if !defined(VINT64x8_RSHIFT_DEFINED) && defined(VINT64x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_RSHIFT_DEFINED) \
+	 && (defined(VINT64x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VINT64x8_LRSHIFT_DEFINED) && defined(VINT64x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_LRSHIFT_DEFINED) \
+	 && (defined(VINT64x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VINT64x8_LSHIFT_DEFINED) && defined(VINT64x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(/* nothing */, 64, 8, 4)
+#if !defined(VINT64x8_LSHIFT_DEFINED) \
+	 && (defined(VINT64x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vint64x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vint64x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VINT64x8_LSHIFT_DEFINED
 #endif
-
-
-
-/* vint64x8 */
-
-#if !defined(VUINT64x8_SPLAT_DEFINED) && defined(VUINT64x4_SPLAT_DEFINED)
-VEC_DOUBLE_SPLAT(u, 64, 8, 4)
+#if !defined(VUINT64x8_SPLAT_DEFINED) \
+	 && (defined(VUINT64x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x)
+{
+	vuint64x8 vec;
+	vec.dbl[0] = vuint64x4_splat(x);
+	vec.dbl[1] = vuint64x4_splat(x);
+	return vec;
+}
 # define VUINT64x8_SPLAT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) && defined(VUINT64x4_LOAD_ALIGNED_DEFINED)
-VEC_DOUBLE_LOAD_ALIGNED(u, 64, 8, 4)
+#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	vec.dbl[0] = vuint64x4_load_aligned(x);
+	vec.dbl[1] = vuint64x4_load_aligned(x + 4);
+	return vec;
+}
 # define VUINT64x8_LOAD_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT64x8_LOAD_DEFINED) && defined(VUINT64x4_LOAD_DEFINED)
-VEC_DOUBLE_LOAD(u, 64, 8, 4)
+#if !defined(VUINT64x8_LOAD_DEFINED) \
+	 && (defined(VUINT64x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	vec.dbl[0] = vuint64x4_load(x);
+	vec.dbl[1] = vuint64x4_load(x + 4);
+	return vec;
+}
 # define VUINT64x8_LOAD_DEFINED
 #endif
-
-#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) && defined(VUINT64x4_STORE_ALIGNED_DEFINED)
-VEC_DOUBLE_STORE_ALIGNED(u, 64, 8, 4)
+#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VUINT64x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8])
+{
+	vuint64x4_store_aligned(vec.dbl[0], x);
+	vuint64x4_store_aligned(vec.dbl[1], x + 4);
+}
 # define VUINT64x8_STORE_ALIGNED_DEFINED
 #endif
-
-#if !defined(VUINT64x8_STORE_DEFINED) && defined(VUINT64x4_STORE_DEFINED)
-VEC_DOUBLE_STORE(u, 64, 8, 4)
+#if !defined(VUINT64x8_STORE_DEFINED) \
+	 && (defined(VUINT64x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8])
+{
+	vuint64x4_store(vec.dbl[0], x);
+	vuint64x4_store(vec.dbl[1], x + 4);
+}
 # define VUINT64x8_STORE_DEFINED
 #endif
-
-#if !defined(VUINT64x8_ADD_DEFINED) && defined(VUINT64x4_ADD_DEFINED)
-VEC_DOUBLE_ADD(u, 64, 8, 4)
+#if !defined(VUINT64x8_ADD_DEFINED) \
+	 && (defined(VUINT64x4_ADD_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_ADD_DEFINED
 #endif
-
-#if !defined(VUINT64x8_SUB_DEFINED) && defined(VUINT64x4_SUB_DEFINED)
-VEC_DOUBLE_SUB(u, 64, 8, 4)
+#if !defined(VUINT64x8_SUB_DEFINED) \
+	 && (defined(VUINT64x4_SUB_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_SUB_DEFINED
 #endif
-
-#if !defined(VUINT64x8_MUL_DEFINED) && defined(VUINT64x4_MUL_DEFINED)
-VEC_DOUBLE_MUL(u, 64, 8, 4)
+#if !defined(VUINT64x8_MUL_DEFINED) \
+	 && (defined(VUINT64x4_MUL_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_MUL_DEFINED
 #endif
-
-#if !defined(VUINT64x8_DIV_DEFINED) && defined(VUINT64x4_DIV_DEFINED)
-VEC_DOUBLE_DIV(u, 64, 8, 4)
+#if !defined(VUINT64x8_DIV_DEFINED) \
+	 && (defined(VUINT64x4_DIV_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_div(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_DIV_DEFINED
 #endif
-
-#if !defined(VUINT64x8_MOD_DEFINED) && defined(VUINT64x4_MOD_DEFINED)
-VEC_DOUBLE_MOD(u, 64, 8, 4)
+#if !defined(VUINT64x8_MOD_DEFINED) \
+	 && (defined(VUINT64x4_MOD_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_mod(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_MOD_DEFINED
 #endif
-
-#if !defined(VUINT64x8_AVG_DEFINED) && defined(VUINT64x4_AVG_DEFINED)
-VEC_DOUBLE_AVG(u, 64, 8, 4)
+#if !defined(VUINT64x8_AVG_DEFINED) \
+	 && (defined(VUINT64x4_AVG_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_AVG_DEFINED
 #endif
-
-#if !defined(VUINT64x8_AND_DEFINED) && defined(VUINT64x4_AND_DEFINED)
-VEC_DOUBLE_AND(u, 64, 8, 4)
+#if !defined(VUINT64x8_AND_DEFINED) \
+	 && (defined(VUINT64x4_AND_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_and(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_and(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_AND_DEFINED
 #endif
-
-#if !defined(VUINT64x8_OR_DEFINED) && defined(VUINT64x4_OR_DEFINED)
-VEC_DOUBLE_OR(u, 64, 8, 4)
+#if !defined(VUINT64x8_OR_DEFINED) \
+	 && (defined(VUINT64x4_OR_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_or(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_or(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_OR_DEFINED
 #endif
-
-#if !defined(VUINT64x8_XOR_DEFINED) && defined(VUINT64x4_XOR_DEFINED)
-VEC_DOUBLE_XOR(u, 64, 8, 4)
+#if !defined(VUINT64x8_XOR_DEFINED) \
+	 && (defined(VUINT64x4_XOR_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_xor(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_xor(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_XOR_DEFINED
 #endif
-
-#if !defined(VUINT64x8_NOT_DEFINED) && defined(VUINT64x4_NOT_DEFINED)
-VEC_DOUBLE_NOT(u, 64, 8, 4)
+#if !defined(VUINT64x8_NOT_DEFINED) \
+	 && (defined(VUINT64x4_NOT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec)
+{
+	vec.dbl[0] = vuint64x4_not(vec.dbl[0]);
+	vec1.dbl[1] = vuint64x4_not(vec.dbl[1]);
+	return vec;
+}
 # define VUINT64x8_NOT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_CMPLT_DEFINED) && defined(VUINT64x4_CMPLT_DEFINED)
-VEC_DOUBLE_CMPLT(u, 64, 8, 4)
+#if !defined(VUINT64x8_CMPLT_DEFINED) \
+	 && (defined(VUINT64x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_CMPLT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_CMPEQ_DEFINED) && defined(VUINT64x4_CMPEQ_DEFINED)
-VEC_DOUBLE_CMPEQ(u, 64, 8, 4)
+#if !defined(VUINT64x8_CMPEQ_DEFINED) \
+	 && (defined(VUINT64x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_CMPEQ_DEFINED
 #endif
-
-#if !defined(VUINT64x8_CMPGT_DEFINED) && defined(VUINT64x4_CMPGT_DEFINED)
-VEC_DOUBLE_CMPGT(u, 64, 8, 4)
+#if !defined(VUINT64x8_CMPGT_DEFINED) \
+	 && (defined(VUINT64x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_CMPGT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_CMPLE_DEFINED) && defined(VUINT64x4_CMPLE_DEFINED)
-VEC_DOUBLE_CMPLE(u, 64, 8, 4)
+#if !defined(VUINT64x8_CMPLE_DEFINED) \
+	 && (defined(VUINT64x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_CMPLE_DEFINED
 #endif
-
-#if !defined(VUINT64x8_CMPGE_DEFINED) && defined(VUINT64x4_CMPGE_DEFINED)
-VEC_DOUBLE_CMPGE(u, 64, 8, 4)
+#if !defined(VUINT64x8_CMPGE_DEFINED) \
+	 && (defined(VUINT64x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_CMPGE_DEFINED
 #endif
-
-#if !defined(VUINT64x8_MIN_DEFINED) && defined(VUINT64x4_MIN_DEFINED)
-VEC_DOUBLE_MIN(u, 64, 8, 4)
+#if !defined(VUINT64x8_MIN_DEFINED) \
+	 && (defined(VUINT64x4_MIN_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_MIN_DEFINED
 #endif
-
-#if !defined(VUINT64x8_MAX_DEFINED) && defined(VUINT64x4_MAX_DEFINED)
-VEC_DOUBLE_MAX(u, 64, 8, 4)
+#if !defined(VUINT64x8_MAX_DEFINED) \
+	 && (defined(VUINT64x4_MAX_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_MAX_DEFINED
 #endif
-
-#if !defined(VUINT64x8_RSHIFT_DEFINED) && defined(VUINT64x4_RSHIFT_DEFINED)
-VEC_DOUBLE_RSHIFT(u, 64, 8, 4)
+#if !defined(VUINT64x8_RSHIFT_DEFINED) \
+	 && (defined(VUINT64x4_RSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_rshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_rshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_RSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_LRSHIFT_DEFINED) && defined(VUINT64x4_LRSHIFT_DEFINED)
-VEC_DOUBLE_LRSHIFT(u, 64, 8, 4)
+#if !defined(VUINT64x8_LRSHIFT_DEFINED) \
+	 && (defined(VUINT64x4_LRSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_lrshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_lrshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_LRSHIFT_DEFINED
 #endif
-
-#if !defined(VUINT64x8_LSHIFT_DEFINED) && defined(VUINT64x4_LSHIFT_DEFINED)
-VEC_DOUBLE_LSHIFT(u, 64, 8, 4)
+#if !defined(VUINT64x8_LSHIFT_DEFINED) \
+	 && (defined(VUINT64x4_LSHIFT_DEFINED))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.dbl[0] = vuint64x4_lshift(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vuint64x4_lshift(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
 # define VUINT64x8_LSHIFT_DEFINED
 #endif
-
-
+#if !defined(VF32x2_SPLAT_DEFINED) \
+	 && (defined(VF32x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x)
+{
+	vf32x2 vec;
+	vec.dbl[0] = vf32x1_splat(x);
+	vec.dbl[1] = vf32x1_splat(x);
+	return vec;
+}
+# define VF32x2_SPLAT_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF32x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	vec.dbl[0] = vf32x1_load_aligned(x);
+	vec.dbl[1] = vf32x1_load_aligned(x + 1);
+	return vec;
+}
+# define VF32x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_DEFINED) \
+	 && (defined(VF32x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	vec.dbl[0] = vf32x1_load(x);
+	vec.dbl[1] = vf32x1_load(x + 1);
+	return vec;
+}
+# define VF32x2_LOAD_DEFINED
+#endif
+#if !defined(VF32x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF32x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2])
+{
+	vf32x1_store_aligned(vec.dbl[0], x);
+	vf32x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VF32x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_STORE_DEFINED) \
+	 && (defined(VF32x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2])
+{
+	vf32x1_store(vec.dbl[0], x);
+	vf32x1_store(vec.dbl[1], x + 1);
+}
+# define VF32x2_STORE_DEFINED
+#endif
+#if !defined(VF32x2_ADD_DEFINED) \
+	 && (defined(VF32x1_ADD_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_ADD_DEFINED
+#endif
+#if !defined(VF32x2_SUB_DEFINED) \
+	 && (defined(VF32x1_SUB_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_SUB_DEFINED
+#endif
+#if !defined(VF32x2_MUL_DEFINED) \
+	 && (defined(VF32x1_MUL_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_MUL_DEFINED
+#endif
+#if !defined(VF32x2_DIV_DEFINED) \
+	 && (defined(VF32x1_DIV_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_div(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_DIV_DEFINED
+#endif
+#if !defined(VF32x2_MOD_DEFINED) \
+	 && (defined(VF32x1_MOD_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_mod(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_MOD_DEFINED
+#endif
+#if !defined(VF32x2_AVG_DEFINED) \
+	 && (defined(VF32x1_AVG_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_AVG_DEFINED
+#endif
+#if !defined(VF32x2_CMPLT_DEFINED) \
+	 && (defined(VF32x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_CMPLT_DEFINED
+#endif
+#if !defined(VF32x2_CMPEQ_DEFINED) \
+	 && (defined(VF32x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x2_CMPGT_DEFINED) \
+	 && (defined(VF32x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_CMPGT_DEFINED
+#endif
+#if !defined(VF32x2_CMPLE_DEFINED) \
+	 && (defined(VF32x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_CMPLE_DEFINED
+#endif
+#if !defined(VF32x2_CMPGE_DEFINED) \
+	 && (defined(VF32x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_CMPGE_DEFINED
+#endif
+#if !defined(VF32x2_MIN_DEFINED) \
+	 && (defined(VF32x1_MIN_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_min(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_MIN_DEFINED
+#endif
+#if !defined(VF32x2_MAX_DEFINED) \
+	 && (defined(VF32x1_MAX_DEFINED))
+VEC_FUNC_IMPL vf32x2 vf32x2_max(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.dbl[0] = vf32x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x2_MAX_DEFINED
+#endif
+#if !defined(VF32x4_SPLAT_DEFINED) \
+	 && (defined(VF32x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x)
+{
+	vf32x4 vec;
+	vec.dbl[0] = vf32x2_splat(x);
+	vec.dbl[1] = vf32x2_splat(x);
+	return vec;
+}
+# define VF32x4_SPLAT_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF32x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.dbl[0] = vf32x2_load_aligned(x);
+	vec.dbl[1] = vf32x2_load_aligned(x + 2);
+	return vec;
+}
+# define VF32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_DEFINED) \
+	 && (defined(VF32x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.dbl[0] = vf32x2_load(x);
+	vec.dbl[1] = vf32x2_load(x + 2);
+	return vec;
+}
+# define VF32x4_LOAD_DEFINED
+#endif
+#if !defined(VF32x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF32x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4])
+{
+	vf32x2_store_aligned(vec.dbl[0], x);
+	vf32x2_store_aligned(vec.dbl[1], x + 2);
+}
+# define VF32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_STORE_DEFINED) \
+	 && (defined(VF32x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4])
+{
+	vf32x2_store(vec.dbl[0], x);
+	vf32x2_store(vec.dbl[1], x + 2);
+}
+# define VF32x4_STORE_DEFINED
+#endif
+#if !defined(VF32x4_ADD_DEFINED) \
+	 && (defined(VF32x2_ADD_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_ADD_DEFINED
+#endif
+#if !defined(VF32x4_SUB_DEFINED) \
+	 && (defined(VF32x2_SUB_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_SUB_DEFINED
+#endif
+#if !defined(VF32x4_MUL_DEFINED) \
+	 && (defined(VF32x2_MUL_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_MUL_DEFINED
+#endif
+#if !defined(VF32x4_DIV_DEFINED) \
+	 && (defined(VF32x2_DIV_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_DIV_DEFINED
+#endif
+#if !defined(VF32x4_MOD_DEFINED) \
+	 && (defined(VF32x2_MOD_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_mod(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_MOD_DEFINED
+#endif
+#if !defined(VF32x4_AVG_DEFINED) \
+	 && (defined(VF32x2_AVG_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_AVG_DEFINED
+#endif
+#if !defined(VF32x4_CMPLT_DEFINED) \
+	 && (defined(VF32x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_CMPLT_DEFINED
+#endif
+#if !defined(VF32x4_CMPEQ_DEFINED) \
+	 && (defined(VF32x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x4_CMPGT_DEFINED) \
+	 && (defined(VF32x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_CMPGT_DEFINED
+#endif
+#if !defined(VF32x4_CMPLE_DEFINED) \
+	 && (defined(VF32x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_CMPLE_DEFINED
+#endif
+#if !defined(VF32x4_CMPGE_DEFINED) \
+	 && (defined(VF32x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_CMPGE_DEFINED
+#endif
+#if !defined(VF32x4_MIN_DEFINED) \
+	 && (defined(VF32x2_MIN_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_MIN_DEFINED
+#endif
+#if !defined(VF32x4_MAX_DEFINED) \
+	 && (defined(VF32x2_MAX_DEFINED))
+VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.dbl[0] = vf32x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x4_MAX_DEFINED
+#endif
+#if !defined(VF32x8_SPLAT_DEFINED) \
+	 && (defined(VF32x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x)
+{
+	vf32x8 vec;
+	vec.dbl[0] = vf32x4_splat(x);
+	vec.dbl[1] = vf32x4_splat(x);
+	return vec;
+}
+# define VF32x8_SPLAT_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF32x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	vec.dbl[0] = vf32x4_load_aligned(x);
+	vec.dbl[1] = vf32x4_load_aligned(x + 4);
+	return vec;
+}
+# define VF32x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_DEFINED) \
+	 && (defined(VF32x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	vec.dbl[0] = vf32x4_load(x);
+	vec.dbl[1] = vf32x4_load(x + 4);
+	return vec;
+}
+# define VF32x8_LOAD_DEFINED
+#endif
+#if !defined(VF32x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF32x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8])
+{
+	vf32x4_store_aligned(vec.dbl[0], x);
+	vf32x4_store_aligned(vec.dbl[1], x + 4);
+}
+# define VF32x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_STORE_DEFINED) \
+	 && (defined(VF32x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8])
+{
+	vf32x4_store(vec.dbl[0], x);
+	vf32x4_store(vec.dbl[1], x + 4);
+}
+# define VF32x8_STORE_DEFINED
+#endif
+#if !defined(VF32x8_ADD_DEFINED) \
+	 && (defined(VF32x4_ADD_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_ADD_DEFINED
+#endif
+#if !defined(VF32x8_SUB_DEFINED) \
+	 && (defined(VF32x4_SUB_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_SUB_DEFINED
+#endif
+#if !defined(VF32x8_MUL_DEFINED) \
+	 && (defined(VF32x4_MUL_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_MUL_DEFINED
+#endif
+#if !defined(VF32x8_DIV_DEFINED) \
+	 && (defined(VF32x4_DIV_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_div(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_DIV_DEFINED
+#endif
+#if !defined(VF32x8_MOD_DEFINED) \
+	 && (defined(VF32x4_MOD_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_mod(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_MOD_DEFINED
+#endif
+#if !defined(VF32x8_AVG_DEFINED) \
+	 && (defined(VF32x4_AVG_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_AVG_DEFINED
+#endif
+#if !defined(VF32x8_CMPLT_DEFINED) \
+	 && (defined(VF32x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_CMPLT_DEFINED
+#endif
+#if !defined(VF32x8_CMPEQ_DEFINED) \
+	 && (defined(VF32x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x8_CMPGT_DEFINED) \
+	 && (defined(VF32x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_CMPGT_DEFINED
+#endif
+#if !defined(VF32x8_CMPLE_DEFINED) \
+	 && (defined(VF32x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_CMPLE_DEFINED
+#endif
+#if !defined(VF32x8_CMPGE_DEFINED) \
+	 && (defined(VF32x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_CMPGE_DEFINED
+#endif
+#if !defined(VF32x8_MIN_DEFINED) \
+	 && (defined(VF32x4_MIN_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_min(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_MIN_DEFINED
+#endif
+#if !defined(VF32x8_MAX_DEFINED) \
+	 && (defined(VF32x4_MAX_DEFINED))
+VEC_FUNC_IMPL vf32x8 vf32x8_max(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.dbl[0] = vf32x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x8_MAX_DEFINED
+#endif
+#if !defined(VF32x16_SPLAT_DEFINED) \
+	 && (defined(VF32x8_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x)
+{
+	vf32x16 vec;
+	vec.dbl[0] = vf32x8_splat(x);
+	vec.dbl[1] = vf32x8_splat(x);
+	return vec;
+}
+# define VF32x16_SPLAT_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF32x8_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	vec.dbl[0] = vf32x8_load_aligned(x);
+	vec.dbl[1] = vf32x8_load_aligned(x + 8);
+	return vec;
+}
+# define VF32x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_DEFINED) \
+	 && (defined(VF32x8_LOAD_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	vec.dbl[0] = vf32x8_load(x);
+	vec.dbl[1] = vf32x8_load(x + 8);
+	return vec;
+}
+# define VF32x16_LOAD_DEFINED
+#endif
+#if !defined(VF32x16_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF32x8_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16])
+{
+	vf32x8_store_aligned(vec.dbl[0], x);
+	vf32x8_store_aligned(vec.dbl[1], x + 8);
+}
+# define VF32x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_STORE_DEFINED) \
+	 && (defined(VF32x8_STORE_DEFINED))
+VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16])
+{
+	vf32x8_store(vec.dbl[0], x);
+	vf32x8_store(vec.dbl[1], x + 8);
+}
+# define VF32x16_STORE_DEFINED
+#endif
+#if !defined(VF32x16_ADD_DEFINED) \
+	 && (defined(VF32x8_ADD_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_ADD_DEFINED
+#endif
+#if !defined(VF32x16_SUB_DEFINED) \
+	 && (defined(VF32x8_SUB_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_SUB_DEFINED
+#endif
+#if !defined(VF32x16_MUL_DEFINED) \
+	 && (defined(VF32x8_MUL_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_MUL_DEFINED
+#endif
+#if !defined(VF32x16_DIV_DEFINED) \
+	 && (defined(VF32x8_DIV_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_div(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_DIV_DEFINED
+#endif
+#if !defined(VF32x16_MOD_DEFINED) \
+	 && (defined(VF32x8_MOD_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_mod(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_MOD_DEFINED
+#endif
+#if !defined(VF32x16_AVG_DEFINED) \
+	 && (defined(VF32x8_AVG_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_AVG_DEFINED
+#endif
+#if !defined(VF32x16_CMPLT_DEFINED) \
+	 && (defined(VF32x8_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_CMPLT_DEFINED
+#endif
+#if !defined(VF32x16_CMPEQ_DEFINED) \
+	 && (defined(VF32x8_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x16_CMPGT_DEFINED) \
+	 && (defined(VF32x8_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_CMPGT_DEFINED
+#endif
+#if !defined(VF32x16_CMPLE_DEFINED) \
+	 && (defined(VF32x8_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_CMPLE_DEFINED
+#endif
+#if !defined(VF32x16_CMPGE_DEFINED) \
+	 && (defined(VF32x8_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_CMPGE_DEFINED
+#endif
+#if !defined(VF32x16_MIN_DEFINED) \
+	 && (defined(VF32x8_MIN_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_min(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_MIN_DEFINED
+#endif
+#if !defined(VF32x16_MAX_DEFINED) \
+	 && (defined(VF32x8_MAX_DEFINED))
+VEC_FUNC_IMPL vf32x16 vf32x16_max(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.dbl[0] = vf32x8_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf32x8_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF32x16_MAX_DEFINED
+#endif
+#if !defined(VF64x2_SPLAT_DEFINED) \
+	 && (defined(VF64x1_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x)
+{
+	vf64x2 vec;
+	vec.dbl[0] = vf64x1_splat(x);
+	vec.dbl[1] = vf64x1_splat(x);
+	return vec;
+}
+# define VF64x2_SPLAT_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF64x1_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	vec.dbl[0] = vf64x1_load_aligned(x);
+	vec.dbl[1] = vf64x1_load_aligned(x + 1);
+	return vec;
+}
+# define VF64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_DEFINED) \
+	 && (defined(VF64x1_LOAD_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	vec.dbl[0] = vf64x1_load(x);
+	vec.dbl[1] = vf64x1_load(x + 1);
+	return vec;
+}
+# define VF64x2_LOAD_DEFINED
+#endif
+#if !defined(VF64x2_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF64x1_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2])
+{
+	vf64x1_store_aligned(vec.dbl[0], x);
+	vf64x1_store_aligned(vec.dbl[1], x + 1);
+}
+# define VF64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_STORE_DEFINED) \
+	 && (defined(VF64x1_STORE_DEFINED))
+VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2])
+{
+	vf64x1_store(vec.dbl[0], x);
+	vf64x1_store(vec.dbl[1], x + 1);
+}
+# define VF64x2_STORE_DEFINED
+#endif
+#if !defined(VF64x2_ADD_DEFINED) \
+	 && (defined(VF64x1_ADD_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_ADD_DEFINED
+#endif
+#if !defined(VF64x2_SUB_DEFINED) \
+	 && (defined(VF64x1_SUB_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_SUB_DEFINED
+#endif
+#if !defined(VF64x2_MUL_DEFINED) \
+	 && (defined(VF64x1_MUL_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_MUL_DEFINED
+#endif
+#if !defined(VF64x2_DIV_DEFINED) \
+	 && (defined(VF64x1_DIV_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_DIV_DEFINED
+#endif
+#if !defined(VF64x2_MOD_DEFINED) \
+	 && (defined(VF64x1_MOD_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_mod(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_MOD_DEFINED
+#endif
+#if !defined(VF64x2_AVG_DEFINED) \
+	 && (defined(VF64x1_AVG_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_AVG_DEFINED
+#endif
+#if !defined(VF64x2_CMPLT_DEFINED) \
+	 && (defined(VF64x1_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_CMPLT_DEFINED
+#endif
+#if !defined(VF64x2_CMPEQ_DEFINED) \
+	 && (defined(VF64x1_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x2_CMPGT_DEFINED) \
+	 && (defined(VF64x1_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_CMPGT_DEFINED
+#endif
+#if !defined(VF64x2_CMPLE_DEFINED) \
+	 && (defined(VF64x1_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_CMPLE_DEFINED
+#endif
+#if !defined(VF64x2_CMPGE_DEFINED) \
+	 && (defined(VF64x1_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_CMPGE_DEFINED
+#endif
+#if !defined(VF64x2_MIN_DEFINED) \
+	 && (defined(VF64x1_MIN_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_MIN_DEFINED
+#endif
+#if !defined(VF64x2_MAX_DEFINED) \
+	 && (defined(VF64x1_MAX_DEFINED))
+VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.dbl[0] = vf64x1_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x1_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x2_MAX_DEFINED
+#endif
+#if !defined(VF64x4_SPLAT_DEFINED) \
+	 && (defined(VF64x2_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x)
+{
+	vf64x4 vec;
+	vec.dbl[0] = vf64x2_splat(x);
+	vec.dbl[1] = vf64x2_splat(x);
+	return vec;
+}
+# define VF64x4_SPLAT_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF64x2_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	vec.dbl[0] = vf64x2_load_aligned(x);
+	vec.dbl[1] = vf64x2_load_aligned(x + 2);
+	return vec;
+}
+# define VF64x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_DEFINED) \
+	 && (defined(VF64x2_LOAD_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	vec.dbl[0] = vf64x2_load(x);
+	vec.dbl[1] = vf64x2_load(x + 2);
+	return vec;
+}
+# define VF64x4_LOAD_DEFINED
+#endif
+#if !defined(VF64x4_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF64x2_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4])
+{
+	vf64x2_store_aligned(vec.dbl[0], x);
+	vf64x2_store_aligned(vec.dbl[1], x + 2);
+}
+# define VF64x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_STORE_DEFINED) \
+	 && (defined(VF64x2_STORE_DEFINED))
+VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4])
+{
+	vf64x2_store(vec.dbl[0], x);
+	vf64x2_store(vec.dbl[1], x + 2);
+}
+# define VF64x4_STORE_DEFINED
+#endif
+#if !defined(VF64x4_ADD_DEFINED) \
+	 && (defined(VF64x2_ADD_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_ADD_DEFINED
+#endif
+#if !defined(VF64x4_SUB_DEFINED) \
+	 && (defined(VF64x2_SUB_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_SUB_DEFINED
+#endif
+#if !defined(VF64x4_MUL_DEFINED) \
+	 && (defined(VF64x2_MUL_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_MUL_DEFINED
+#endif
+#if !defined(VF64x4_DIV_DEFINED) \
+	 && (defined(VF64x2_DIV_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_div(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_DIV_DEFINED
+#endif
+#if !defined(VF64x4_MOD_DEFINED) \
+	 && (defined(VF64x2_MOD_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_mod(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_MOD_DEFINED
+#endif
+#if !defined(VF64x4_AVG_DEFINED) \
+	 && (defined(VF64x2_AVG_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_AVG_DEFINED
+#endif
+#if !defined(VF64x4_CMPLT_DEFINED) \
+	 && (defined(VF64x2_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_CMPLT_DEFINED
+#endif
+#if !defined(VF64x4_CMPEQ_DEFINED) \
+	 && (defined(VF64x2_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x4_CMPGT_DEFINED) \
+	 && (defined(VF64x2_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_CMPGT_DEFINED
+#endif
+#if !defined(VF64x4_CMPLE_DEFINED) \
+	 && (defined(VF64x2_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_CMPLE_DEFINED
+#endif
+#if !defined(VF64x4_CMPGE_DEFINED) \
+	 && (defined(VF64x2_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_CMPGE_DEFINED
+#endif
+#if !defined(VF64x4_MIN_DEFINED) \
+	 && (defined(VF64x2_MIN_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_min(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_MIN_DEFINED
+#endif
+#if !defined(VF64x4_MAX_DEFINED) \
+	 && (defined(VF64x2_MAX_DEFINED))
+VEC_FUNC_IMPL vf64x4 vf64x4_max(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.dbl[0] = vf64x2_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x2_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x4_MAX_DEFINED
+#endif
+#if !defined(VF64x8_SPLAT_DEFINED) \
+	 && (defined(VF64x4_SPLAT_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x)
+{
+	vf64x8 vec;
+	vec.dbl[0] = vf64x4_splat(x);
+	vec.dbl[1] = vf64x4_splat(x);
+	return vec;
+}
+# define VF64x8_SPLAT_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_ALIGNED_DEFINED) \
+	 && (defined(VF64x4_LOAD_ALIGNED_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	vec.dbl[0] = vf64x4_load_aligned(x);
+	vec.dbl[1] = vf64x4_load_aligned(x + 4);
+	return vec;
+}
+# define VF64x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_DEFINED) \
+	 && (defined(VF64x4_LOAD_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	vec.dbl[0] = vf64x4_load(x);
+	vec.dbl[1] = vf64x4_load(x + 4);
+	return vec;
+}
+# define VF64x8_LOAD_DEFINED
+#endif
+#if !defined(VF64x8_STORE_ALIGNED_DEFINED) \
+	 && (defined(VF64x4_STORE_ALIGNED_DEFINED))
+VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8])
+{
+	vf64x4_store_aligned(vec.dbl[0], x);
+	vf64x4_store_aligned(vec.dbl[1], x + 4);
+}
+# define VF64x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_STORE_DEFINED) \
+	 && (defined(VF64x4_STORE_DEFINED))
+VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8])
+{
+	vf64x4_store(vec.dbl[0], x);
+	vf64x4_store(vec.dbl[1], x + 4);
+}
+# define VF64x8_STORE_DEFINED
+#endif
+#if !defined(VF64x8_ADD_DEFINED) \
+	 && (defined(VF64x4_ADD_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_add(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_add(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_ADD_DEFINED
+#endif
+#if !defined(VF64x8_SUB_DEFINED) \
+	 && (defined(VF64x4_SUB_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_sub(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_sub(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_SUB_DEFINED
+#endif
+#if !defined(VF64x8_MUL_DEFINED) \
+	 && (defined(VF64x4_MUL_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_mul(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_mul(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_MUL_DEFINED
+#endif
+#if !defined(VF64x8_DIV_DEFINED) \
+	 && (defined(VF64x4_DIV_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_div(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_div(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_div(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_DIV_DEFINED
+#endif
+#if !defined(VF64x8_MOD_DEFINED) \
+	 && (defined(VF64x4_MOD_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_mod(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_mod(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_mod(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_MOD_DEFINED
+#endif
+#if !defined(VF64x8_AVG_DEFINED) \
+	 && (defined(VF64x4_AVG_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_avg(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_avg(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_AVG_DEFINED
+#endif
+#if !defined(VF64x8_CMPLT_DEFINED) \
+	 && (defined(VF64x4_CMPLT_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_cmplt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_cmplt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_CMPLT_DEFINED
+#endif
+#if !defined(VF64x8_CMPEQ_DEFINED) \
+	 && (defined(VF64x4_CMPEQ_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_cmpeq(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_cmpeq(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x8_CMPGT_DEFINED) \
+	 && (defined(VF64x4_CMPGT_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_cmpgt(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_cmpgt(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_CMPGT_DEFINED
+#endif
+#if !defined(VF64x8_CMPLE_DEFINED) \
+	 && (defined(VF64x4_CMPLE_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_cmple(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_cmple(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_CMPLE_DEFINED
+#endif
+#if !defined(VF64x8_CMPGE_DEFINED) \
+	 && (defined(VF64x4_CMPGE_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_cmpge(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_cmpge(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_CMPGE_DEFINED
+#endif
+#if !defined(VF64x8_MIN_DEFINED) \
+	 && (defined(VF64x4_MIN_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_min(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_min(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_min(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_MIN_DEFINED
+#endif
+#if !defined(VF64x8_MAX_DEFINED) \
+	 && (defined(VF64x4_MAX_DEFINED))
+VEC_FUNC_IMPL vf64x8 vf64x8_max(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.dbl[0] = vf64x4_max(vec1.dbl[0], vec2.dbl[0]);
+	vec1.dbl[1] = vf64x4_max(vec1.dbl[1], vec2.dbl[1]);
+	return vec1;
+}
+# define VF64x8_MAX_DEFINED
+#endif
--- a/include/vec/impl/gcc.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/gcc.h	Wed Apr 30 18:36:38 2025 -0400
@@ -25,24 +25,20 @@
 /* This file is automatically generated! Do not edit it directly!
  * Edit the code that generates it in utils/gengcc.c  --paper */
 
-#ifndef VEC_IMPL_GCC_H_
-#define VEC_IMPL_GCC_H_
-
-
+/* ------------------------------------------------------------------------ */
+/* PREPROCESSOR HELL INCOMING */
 
-
-/* vuint8x2 */
-
-#ifndef VINT8x2_SPLAT_DEFINED
+#if !defined(VINT8x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x)
 {
 	vint8x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VINT8x2_SPLAT_DEFINED
 #endif
-#ifndef VINT8x2_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2])
 {
 	vint8x2 vec;
@@ -51,7 +47,7 @@
 }
 # define VINT8x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x2_LOAD_DEFINED
+#if !defined(VINT8x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2])
 {
 	vint8x2 vec;
@@ -60,21 +56,21 @@
 }
 # define VINT8x2_LOAD_DEFINED
 #endif
-#ifndef VINT8x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x2_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x2_STORE_DEFINED
 #endif
-#ifndef VINT8x2_ADD_DEFINED
+#if !defined(VINT8x2_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -82,7 +78,7 @@
 }
 # define VINT8x2_ADD_DEFINED
 #endif
-#ifndef VINT8x2_SUB_DEFINED
+#if !defined(VINT8x2_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -90,7 +86,7 @@
 }
 # define VINT8x2_SUB_DEFINED
 #endif
-#ifndef VINT8x2_MUL_DEFINED
+#if !defined(VINT8x2_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -98,106 +94,8 @@
 }
 # define VINT8x2_MUL_DEFINED
 #endif
-#ifndef VINT8x2_AND_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_AND_DEFINED
-#endif
-#ifndef VINT8x2_OR_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_OR_DEFINED
-#endif
-#ifndef VINT8x2_XOR_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_MIN_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2)
-{
-	vint8x2 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x2_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_MAX_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2)
-{
-	vint8x2 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x2_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_AVG_DEFINED
+#if !defined(VINT8x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2)
 {
 	vint8x2 ones = vint8x2_splat(1);
@@ -211,19 +109,107 @@
 }
 # define VINT8x2_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_RSHIFT_DEFINED
+#if !defined(VINT8x2_AND_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_AND_DEFINED
+#endif
+#if !defined(VINT8x2_OR_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_OR_DEFINED
+#endif
+#if !defined(VINT8x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_XOR_DEFINED
+#endif
+#if !defined(VINT8x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x2_NOT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2)
+{
+	vint8x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x2_MIN_DEFINED
+#endif
+#if !defined(VINT8x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2)
+{
+	vint8x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x2_MAX_DEFINED
+#endif
+#if !defined(VINT8x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -231,9 +217,8 @@
 }
 # define VINT8x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x2_LRSHIFT_DEFINED
+#if !defined(VINT8x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc);
@@ -241,29 +226,26 @@
 }
 # define VINT8x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x2_NOT_DEFINED
-VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x2_NOT_DEFINED
-#endif
-
-
-/* vint8x2 */
-
-#ifndef VUINT8x2_SPLAT_DEFINED
+#if !defined(VINT8x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x)
 {
 	vuint8x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VUINT8x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2])
 {
 	vuint8x2 vec;
@@ -272,7 +254,7 @@
 }
 # define VUINT8x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x2_LOAD_DEFINED
+#if !defined(VUINT8x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2])
 {
 	vuint8x2 vec;
@@ -281,21 +263,21 @@
 }
 # define VUINT8x2_LOAD_DEFINED
 #endif
-#ifndef VUINT8x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x2_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x2_STORE_DEFINED
 #endif
-#ifndef VUINT8x2_ADD_DEFINED
+#if !defined(VUINT8x2_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -303,7 +285,7 @@
 }
 # define VUINT8x2_ADD_DEFINED
 #endif
-#ifndef VUINT8x2_SUB_DEFINED
+#if !defined(VUINT8x2_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -311,7 +293,7 @@
 }
 # define VUINT8x2_SUB_DEFINED
 #endif
-#ifndef VUINT8x2_MUL_DEFINED
+#if !defined(VUINT8x2_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -319,7 +301,16 @@
 }
 # define VUINT8x2_MUL_DEFINED
 #endif
-#ifndef VUINT8x2_AND_DEFINED
+#if !defined(VUINT8x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x2_AVG_DEFINED
+#endif
+#if !defined(VUINT8x2_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -327,7 +318,7 @@
 }
 # define VUINT8x2_AND_DEFINED
 #endif
-#ifndef VUINT8x2_OR_DEFINED
+#if !defined(VUINT8x2_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -335,7 +326,7 @@
 }
 # define VUINT8x2_OR_DEFINED
 #endif
-#ifndef VUINT8x2_XOR_DEFINED
+#if !defined(VUINT8x2_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -343,8 +334,16 @@
 }
 # define VUINT8x2_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_CMPLT_DEFINED
+#if !defined(VUINT8x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x2_NOT_DEFINED
+#endif
+#if !defined(VUINT8x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -352,9 +351,8 @@
 }
 # define VUINT8x2_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_CMPEQ_DEFINED
+#if !defined(VUINT8x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -362,9 +360,8 @@
 }
 # define VUINT8x2_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_CMPGT_DEFINED
+#if !defined(VUINT8x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -372,9 +369,8 @@
 }
 # define VUINT8x2_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_CMPLE_DEFINED
+#if !defined(VUINT8x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -382,9 +378,8 @@
 }
 # define VUINT8x2_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_CMPGE_DEFINED
+#if !defined(VUINT8x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -392,9 +387,8 @@
 }
 # define VUINT8x2_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_MIN_DEFINED
+#if !defined(VUINT8x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vuint8x2 mask;
@@ -404,9 +398,8 @@
 }
 # define VUINT8x2_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_MAX_DEFINED
+#if !defined(VUINT8x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vuint8x2 mask;
@@ -416,30 +409,8 @@
 }
 # define VUINT8x2_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2)
-{
-	vint8x2 ones = vint8x2_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x2_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_RSHIFT_DEFINED
+#if !defined(VUINT8x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -447,9 +418,8 @@
 }
 # define VUINT8x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x2_LRSHIFT_DEFINED
+#if !defined(VUINT8x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc);
@@ -457,29 +427,28 @@
 }
 # define VUINT8x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x2_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x2_NOT_DEFINED
-#endif
-
-
-/* vuint8x4 */
-
-#ifndef VINT8x4_SPLAT_DEFINED
+#if !defined(VUINT8x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x)
 {
 	vint8x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VINT8x4_SPLAT_DEFINED
 #endif
-#ifndef VINT8x4_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4])
 {
 	vint8x4 vec;
@@ -488,7 +457,7 @@
 }
 # define VINT8x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x4_LOAD_DEFINED
+#if !defined(VINT8x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4])
 {
 	vint8x4 vec;
@@ -497,21 +466,21 @@
 }
 # define VINT8x4_LOAD_DEFINED
 #endif
-#ifndef VINT8x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x4_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x4_STORE_DEFINED
 #endif
-#ifndef VINT8x4_ADD_DEFINED
+#if !defined(VINT8x4_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -519,7 +488,7 @@
 }
 # define VINT8x4_ADD_DEFINED
 #endif
-#ifndef VINT8x4_SUB_DEFINED
+#if !defined(VINT8x4_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -527,7 +496,7 @@
 }
 # define VINT8x4_SUB_DEFINED
 #endif
-#ifndef VINT8x4_MUL_DEFINED
+#if !defined(VINT8x4_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -535,106 +504,8 @@
 }
 # define VINT8x4_MUL_DEFINED
 #endif
-#ifndef VINT8x4_AND_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_AND_DEFINED
-#endif
-#ifndef VINT8x4_OR_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_OR_DEFINED
-#endif
-#ifndef VINT8x4_XOR_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_MIN_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2)
-{
-	vint8x4 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x4_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_MAX_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2)
-{
-	vint8x4 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x4_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_AVG_DEFINED
+#if !defined(VINT8x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2)
 {
 	vint8x4 ones = vint8x4_splat(1);
@@ -648,19 +519,107 @@
 }
 # define VINT8x4_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_RSHIFT_DEFINED
+#if !defined(VINT8x4_AND_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_AND_DEFINED
+#endif
+#if !defined(VINT8x4_OR_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_OR_DEFINED
+#endif
+#if !defined(VINT8x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_XOR_DEFINED
+#endif
+#if !defined(VINT8x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x4_NOT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2)
+{
+	vint8x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x4_MIN_DEFINED
+#endif
+#if !defined(VINT8x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2)
+{
+	vint8x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x4_MAX_DEFINED
+#endif
+#if !defined(VINT8x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -668,9 +627,8 @@
 }
 # define VINT8x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x4_LRSHIFT_DEFINED
+#if !defined(VINT8x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
@@ -678,29 +636,28 @@
 }
 # define VINT8x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x4_NOT_DEFINED
-VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x4_NOT_DEFINED
-#endif
-
-
-/* vint8x4 */
-
-#ifndef VUINT8x4_SPLAT_DEFINED
+#if !defined(VINT8x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x)
 {
 	vuint8x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VUINT8x4_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x4_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4])
 {
 	vuint8x4 vec;
@@ -709,7 +666,7 @@
 }
 # define VUINT8x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x4_LOAD_DEFINED
+#if !defined(VUINT8x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4])
 {
 	vuint8x4 vec;
@@ -718,21 +675,21 @@
 }
 # define VUINT8x4_LOAD_DEFINED
 #endif
-#ifndef VUINT8x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x4_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x4_STORE_DEFINED
 #endif
-#ifndef VUINT8x4_ADD_DEFINED
+#if !defined(VUINT8x4_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -740,7 +697,7 @@
 }
 # define VUINT8x4_ADD_DEFINED
 #endif
-#ifndef VUINT8x4_SUB_DEFINED
+#if !defined(VUINT8x4_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -748,7 +705,7 @@
 }
 # define VUINT8x4_SUB_DEFINED
 #endif
-#ifndef VUINT8x4_MUL_DEFINED
+#if !defined(VUINT8x4_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -756,7 +713,16 @@
 }
 # define VUINT8x4_MUL_DEFINED
 #endif
-#ifndef VUINT8x4_AND_DEFINED
+#if !defined(VUINT8x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x4_AVG_DEFINED
+#endif
+#if !defined(VUINT8x4_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -764,7 +730,7 @@
 }
 # define VUINT8x4_AND_DEFINED
 #endif
-#ifndef VUINT8x4_OR_DEFINED
+#if !defined(VUINT8x4_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -772,7 +738,7 @@
 }
 # define VUINT8x4_OR_DEFINED
 #endif
-#ifndef VUINT8x4_XOR_DEFINED
+#if !defined(VUINT8x4_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -780,8 +746,16 @@
 }
 # define VUINT8x4_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_CMPLT_DEFINED
+#if !defined(VUINT8x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x4_NOT_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -789,9 +763,8 @@
 }
 # define VUINT8x4_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_CMPEQ_DEFINED
+#if !defined(VUINT8x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -799,9 +772,8 @@
 }
 # define VUINT8x4_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_CMPGT_DEFINED
+#if !defined(VUINT8x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -809,9 +781,8 @@
 }
 # define VUINT8x4_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_CMPLE_DEFINED
+#if !defined(VUINT8x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -819,9 +790,8 @@
 }
 # define VUINT8x4_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_CMPGE_DEFINED
+#if !defined(VUINT8x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -829,9 +799,8 @@
 }
 # define VUINT8x4_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_MIN_DEFINED
+#if !defined(VUINT8x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vuint8x4 mask;
@@ -841,9 +810,8 @@
 }
 # define VUINT8x4_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_MAX_DEFINED
+#if !defined(VUINT8x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vuint8x4 mask;
@@ -853,30 +821,8 @@
 }
 # define VUINT8x4_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2)
-{
-	vint8x4 ones = vint8x4_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x4_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_RSHIFT_DEFINED
+#if !defined(VUINT8x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -884,9 +830,8 @@
 }
 # define VUINT8x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x4_LRSHIFT_DEFINED
+#if !defined(VUINT8x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
@@ -894,29 +839,32 @@
 }
 # define VUINT8x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x4_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x4_NOT_DEFINED
-#endif
-
-
-/* vuint8x8 */
-
-#ifndef VINT8x8_SPLAT_DEFINED
+#if !defined(VUINT8x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x)
 {
 	vint8x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VINT8x8_SPLAT_DEFINED
 #endif
-#ifndef VINT8x8_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8])
 {
 	vint8x8 vec;
@@ -925,7 +873,7 @@
 }
 # define VINT8x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x8_LOAD_DEFINED
+#if !defined(VINT8x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8])
 {
 	vint8x8 vec;
@@ -934,21 +882,21 @@
 }
 # define VINT8x8_LOAD_DEFINED
 #endif
-#ifndef VINT8x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x8_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x8_STORE_DEFINED
 #endif
-#ifndef VINT8x8_ADD_DEFINED
+#if !defined(VINT8x8_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -956,7 +904,7 @@
 }
 # define VINT8x8_ADD_DEFINED
 #endif
-#ifndef VINT8x8_SUB_DEFINED
+#if !defined(VINT8x8_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -964,7 +912,7 @@
 }
 # define VINT8x8_SUB_DEFINED
 #endif
-#ifndef VINT8x8_MUL_DEFINED
+#if !defined(VINT8x8_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -972,106 +920,8 @@
 }
 # define VINT8x8_MUL_DEFINED
 #endif
-#ifndef VINT8x8_AND_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_AND_DEFINED
-#endif
-#ifndef VINT8x8_OR_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_OR_DEFINED
-#endif
-#ifndef VINT8x8_XOR_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_MIN_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2)
-{
-	vint8x8 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x8_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_MAX_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2)
-{
-	vint8x8 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x8_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_AVG_DEFINED
+#if !defined(VINT8x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2)
 {
 	vint8x8 ones = vint8x8_splat(1);
@@ -1085,19 +935,107 @@
 }
 # define VINT8x8_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_RSHIFT_DEFINED
+#if !defined(VINT8x8_AND_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_AND_DEFINED
+#endif
+#if !defined(VINT8x8_OR_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_OR_DEFINED
+#endif
+#if !defined(VINT8x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_XOR_DEFINED
+#endif
+#if !defined(VINT8x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x8_NOT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2)
+{
+	vint8x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x8_MIN_DEFINED
+#endif
+#if !defined(VINT8x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2)
+{
+	vint8x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x8_MAX_DEFINED
+#endif
+#if !defined(VINT8x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -1105,9 +1043,8 @@
 }
 # define VINT8x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x8_LRSHIFT_DEFINED
+#if !defined(VINT8x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -1115,29 +1052,32 @@
 }
 # define VINT8x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x8_NOT_DEFINED
-VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x8_NOT_DEFINED
-#endif
-
-
-/* vint8x8 */
-
-#ifndef VUINT8x8_SPLAT_DEFINED
+#if !defined(VINT8x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x)
 {
 	vuint8x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VUINT8x8_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x8_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8])
 {
 	vuint8x8 vec;
@@ -1146,7 +1086,7 @@
 }
 # define VUINT8x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x8_LOAD_DEFINED
+#if !defined(VUINT8x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8])
 {
 	vuint8x8 vec;
@@ -1155,21 +1095,21 @@
 }
 # define VUINT8x8_LOAD_DEFINED
 #endif
-#ifndef VUINT8x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x8_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x8_STORE_DEFINED
 #endif
-#ifndef VUINT8x8_ADD_DEFINED
+#if !defined(VUINT8x8_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -1177,7 +1117,7 @@
 }
 # define VUINT8x8_ADD_DEFINED
 #endif
-#ifndef VUINT8x8_SUB_DEFINED
+#if !defined(VUINT8x8_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -1185,7 +1125,7 @@
 }
 # define VUINT8x8_SUB_DEFINED
 #endif
-#ifndef VUINT8x8_MUL_DEFINED
+#if !defined(VUINT8x8_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -1193,7 +1133,16 @@
 }
 # define VUINT8x8_MUL_DEFINED
 #endif
-#ifndef VUINT8x8_AND_DEFINED
+#if !defined(VUINT8x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x8_AVG_DEFINED
+#endif
+#if !defined(VUINT8x8_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -1201,7 +1150,7 @@
 }
 # define VUINT8x8_AND_DEFINED
 #endif
-#ifndef VUINT8x8_OR_DEFINED
+#if !defined(VUINT8x8_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -1209,7 +1158,7 @@
 }
 # define VUINT8x8_OR_DEFINED
 #endif
-#ifndef VUINT8x8_XOR_DEFINED
+#if !defined(VUINT8x8_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -1217,8 +1166,16 @@
 }
 # define VUINT8x8_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_CMPLT_DEFINED
+#if !defined(VUINT8x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x8_NOT_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -1226,9 +1183,8 @@
 }
 # define VUINT8x8_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_CMPEQ_DEFINED
+#if !defined(VUINT8x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -1236,9 +1192,8 @@
 }
 # define VUINT8x8_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_CMPGT_DEFINED
+#if !defined(VUINT8x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -1246,9 +1201,8 @@
 }
 # define VUINT8x8_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_CMPLE_DEFINED
+#if !defined(VUINT8x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -1256,9 +1210,8 @@
 }
 # define VUINT8x8_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_CMPGE_DEFINED
+#if !defined(VUINT8x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -1266,9 +1219,8 @@
 }
 # define VUINT8x8_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_MIN_DEFINED
+#if !defined(VUINT8x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vuint8x8 mask;
@@ -1278,9 +1230,8 @@
 }
 # define VUINT8x8_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_MAX_DEFINED
+#if !defined(VUINT8x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vuint8x8 mask;
@@ -1290,30 +1241,8 @@
 }
 # define VUINT8x8_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2)
-{
-	vint8x8 ones = vint8x8_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x8_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_RSHIFT_DEFINED
+#if !defined(VUINT8x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -1321,9 +1250,8 @@
 }
 # define VUINT8x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x8_LRSHIFT_DEFINED
+#if !defined(VUINT8x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -1331,29 +1259,40 @@
 }
 # define VUINT8x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x8_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x8_NOT_DEFINED
-#endif
-
-
-/* vuint8x16 */
-
-#ifndef VINT8x16_SPLAT_DEFINED
+#if !defined(VUINT8x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
 {
 	vint8x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VINT8x16_SPLAT_DEFINED
 #endif
-#ifndef VINT8x16_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
 {
 	vint8x16 vec;
@@ -1362,7 +1301,7 @@
 }
 # define VINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x16_LOAD_DEFINED
+#if !defined(VINT8x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
 {
 	vint8x16 vec;
@@ -1371,21 +1310,21 @@
 }
 # define VINT8x16_LOAD_DEFINED
 #endif
-#ifndef VINT8x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x16_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x16_STORE_DEFINED
 #endif
-#ifndef VINT8x16_ADD_DEFINED
+#if !defined(VINT8x16_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -1393,7 +1332,7 @@
 }
 # define VINT8x16_ADD_DEFINED
 #endif
-#ifndef VINT8x16_SUB_DEFINED
+#if !defined(VINT8x16_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -1401,7 +1340,7 @@
 }
 # define VINT8x16_SUB_DEFINED
 #endif
-#ifndef VINT8x16_MUL_DEFINED
+#if !defined(VINT8x16_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -1409,106 +1348,8 @@
 }
 # define VINT8x16_MUL_DEFINED
 #endif
-#ifndef VINT8x16_AND_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_AND_DEFINED
-#endif
-#ifndef VINT8x16_OR_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_OR_DEFINED
-#endif
-#ifndef VINT8x16_XOR_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_MIN_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
-{
-	vint8x16 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x16_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_MAX_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
-{
-	vint8x16 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x16_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_AVG_DEFINED
+#if !defined(VINT8x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 ones = vint8x16_splat(1);
@@ -1522,19 +1363,107 @@
 }
 # define VINT8x16_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_RSHIFT_DEFINED
+#if !defined(VINT8x16_AND_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_AND_DEFINED
+#endif
+#if !defined(VINT8x16_OR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_OR_DEFINED
+#endif
+#if !defined(VINT8x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_XOR_DEFINED
+#endif
+#if !defined(VINT8x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x16_NOT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x16_MIN_DEFINED
+#endif
+#if !defined(VINT8x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x16_MAX_DEFINED
+#endif
+#if !defined(VINT8x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -1542,9 +1471,8 @@
 }
 # define VINT8x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x16_LRSHIFT_DEFINED
+#if !defined(VINT8x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -1552,29 +1480,40 @@
 }
 # define VINT8x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x16_NOT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x16_NOT_DEFINED
-#endif
-
-
-/* vint8x16 */
-
-#ifndef VUINT8x16_SPLAT_DEFINED
+#if !defined(VINT8x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
 {
 	vuint8x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VUINT8x16_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
 {
 	vuint8x16 vec;
@@ -1583,7 +1522,7 @@
 }
 # define VUINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x16_LOAD_DEFINED
+#if !defined(VUINT8x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
 {
 	vuint8x16 vec;
@@ -1592,21 +1531,21 @@
 }
 # define VUINT8x16_LOAD_DEFINED
 #endif
-#ifndef VUINT8x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x16_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x16_STORE_DEFINED
 #endif
-#ifndef VUINT8x16_ADD_DEFINED
+#if !defined(VUINT8x16_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -1614,7 +1553,7 @@
 }
 # define VUINT8x16_ADD_DEFINED
 #endif
-#ifndef VUINT8x16_SUB_DEFINED
+#if !defined(VUINT8x16_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -1622,7 +1561,7 @@
 }
 # define VUINT8x16_SUB_DEFINED
 #endif
-#ifndef VUINT8x16_MUL_DEFINED
+#if !defined(VUINT8x16_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -1630,7 +1569,16 @@
 }
 # define VUINT8x16_MUL_DEFINED
 #endif
-#ifndef VUINT8x16_AND_DEFINED
+#if !defined(VUINT8x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x16_AVG_DEFINED
+#endif
+#if !defined(VUINT8x16_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -1638,7 +1586,7 @@
 }
 # define VUINT8x16_AND_DEFINED
 #endif
-#ifndef VUINT8x16_OR_DEFINED
+#if !defined(VUINT8x16_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -1646,7 +1594,7 @@
 }
 # define VUINT8x16_OR_DEFINED
 #endif
-#ifndef VUINT8x16_XOR_DEFINED
+#if !defined(VUINT8x16_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -1654,8 +1602,16 @@
 }
 # define VUINT8x16_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_CMPLT_DEFINED
+#if !defined(VUINT8x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x16_NOT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -1663,9 +1619,8 @@
 }
 # define VUINT8x16_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_CMPEQ_DEFINED
+#if !defined(VUINT8x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -1673,9 +1628,8 @@
 }
 # define VUINT8x16_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_CMPGT_DEFINED
+#if !defined(VUINT8x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -1683,9 +1637,8 @@
 }
 # define VUINT8x16_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_CMPLE_DEFINED
+#if !defined(VUINT8x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -1693,9 +1646,8 @@
 }
 # define VUINT8x16_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_CMPGE_DEFINED
+#if !defined(VUINT8x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -1703,9 +1655,8 @@
 }
 # define VUINT8x16_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_MIN_DEFINED
+#if !defined(VUINT8x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 mask;
@@ -1715,9 +1666,8 @@
 }
 # define VUINT8x16_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_MAX_DEFINED
+#if !defined(VUINT8x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 mask;
@@ -1727,30 +1677,8 @@
 }
 # define VUINT8x16_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
-{
-	vint8x16 ones = vint8x16_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x16_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_RSHIFT_DEFINED
+#if !defined(VUINT8x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -1758,9 +1686,8 @@
 }
 # define VUINT8x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x16_LRSHIFT_DEFINED
+#if !defined(VUINT8x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -1768,29 +1695,56 @@
 }
 # define VUINT8x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x16_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x16_NOT_DEFINED
-#endif
-
-
-/* vuint8x32 */
-
-#ifndef VINT8x32_SPLAT_DEFINED
+#if !defined(VUINT8x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x32_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x)
 {
 	vint8x32 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
 	return vec;
 }
 # define VINT8x32_SPLAT_DEFINED
 #endif
-#ifndef VINT8x32_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32])
 {
 	vint8x32 vec;
@@ -1799,7 +1753,7 @@
 }
 # define VINT8x32_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x32_LOAD_DEFINED
+#if !defined(VINT8x32_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32])
 {
 	vint8x32 vec;
@@ -1808,21 +1762,21 @@
 }
 # define VINT8x32_LOAD_DEFINED
 #endif
-#ifndef VINT8x32_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 arr[32])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x32_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x32_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 arr[32])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x32_STORE_DEFINED
 #endif
-#ifndef VINT8x32_ADD_DEFINED
+#if !defined(VINT8x32_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -1830,7 +1784,7 @@
 }
 # define VINT8x32_ADD_DEFINED
 #endif
-#ifndef VINT8x32_SUB_DEFINED
+#if !defined(VINT8x32_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -1838,7 +1792,7 @@
 }
 # define VINT8x32_SUB_DEFINED
 #endif
-#ifndef VINT8x32_MUL_DEFINED
+#if !defined(VINT8x32_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -1846,106 +1800,8 @@
 }
 # define VINT8x32_MUL_DEFINED
 #endif
-#ifndef VINT8x32_AND_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_AND_DEFINED
-#endif
-#ifndef VINT8x32_OR_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_OR_DEFINED
-#endif
-#ifndef VINT8x32_XOR_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_MIN_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2)
-{
-	vint8x32 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x32_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_MAX_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2)
-{
-	vint8x32 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x32_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_AVG_DEFINED
+#if !defined(VINT8x32_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2)
 {
 	vint8x32 ones = vint8x32_splat(1);
@@ -1959,19 +1815,107 @@
 }
 # define VINT8x32_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x32_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_RSHIFT_DEFINED
+#if !defined(VINT8x32_AND_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_AND_DEFINED
+#endif
+#if !defined(VINT8x32_OR_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_OR_DEFINED
+#endif
+#if !defined(VINT8x32_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_XOR_DEFINED
+#endif
+#if !defined(VINT8x32_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x32_NOT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x32_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x32_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x32_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2)
+{
+	vint8x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x32_MIN_DEFINED
+#endif
+#if !defined(VINT8x32_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2)
+{
+	vint8x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x32_MAX_DEFINED
+#endif
+#if !defined(VINT8x32_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -1979,9 +1923,8 @@
 }
 # define VINT8x32_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x32_LRSHIFT_DEFINED
+#if !defined(VINT8x32_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -1989,29 +1932,56 @@
 }
 # define VINT8x32_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x32_NOT_DEFINED
-VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x32_NOT_DEFINED
-#endif
-
-
-/* vint8x32 */
-
-#ifndef VUINT8x32_SPLAT_DEFINED
+#if !defined(VINT8x32_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x32_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x)
 {
 	vuint8x32 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
 	return vec;
 }
 # define VUINT8x32_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x32_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32])
 {
 	vuint8x32 vec;
@@ -2020,7 +1990,7 @@
 }
 # define VUINT8x32_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x32_LOAD_DEFINED
+#if !defined(VUINT8x32_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32])
 {
 	vuint8x32 vec;
@@ -2029,21 +1999,21 @@
 }
 # define VUINT8x32_LOAD_DEFINED
 #endif
-#ifndef VUINT8x32_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 arr[32])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x32_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x32_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 arr[32])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x32_STORE_DEFINED
 #endif
-#ifndef VUINT8x32_ADD_DEFINED
+#if !defined(VUINT8x32_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -2051,7 +2021,7 @@
 }
 # define VUINT8x32_ADD_DEFINED
 #endif
-#ifndef VUINT8x32_SUB_DEFINED
+#if !defined(VUINT8x32_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -2059,7 +2029,7 @@
 }
 # define VUINT8x32_SUB_DEFINED
 #endif
-#ifndef VUINT8x32_MUL_DEFINED
+#if !defined(VUINT8x32_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -2067,7 +2037,16 @@
 }
 # define VUINT8x32_MUL_DEFINED
 #endif
-#ifndef VUINT8x32_AND_DEFINED
+#if !defined(VUINT8x32_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x32_AVG_DEFINED
+#endif
+#if !defined(VUINT8x32_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -2075,7 +2054,7 @@
 }
 # define VUINT8x32_AND_DEFINED
 #endif
-#ifndef VUINT8x32_OR_DEFINED
+#if !defined(VUINT8x32_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -2083,7 +2062,7 @@
 }
 # define VUINT8x32_OR_DEFINED
 #endif
-#ifndef VUINT8x32_XOR_DEFINED
+#if !defined(VUINT8x32_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -2091,8 +2070,16 @@
 }
 # define VUINT8x32_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_CMPLT_DEFINED
+#if !defined(VUINT8x32_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x32_NOT_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -2100,9 +2087,8 @@
 }
 # define VUINT8x32_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_CMPEQ_DEFINED
+#if !defined(VUINT8x32_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -2110,9 +2096,8 @@
 }
 # define VUINT8x32_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_CMPGT_DEFINED
+#if !defined(VUINT8x32_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -2120,9 +2105,8 @@
 }
 # define VUINT8x32_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_CMPLE_DEFINED
+#if !defined(VUINT8x32_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -2130,9 +2114,8 @@
 }
 # define VUINT8x32_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_CMPGE_DEFINED
+#if !defined(VUINT8x32_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -2140,9 +2123,8 @@
 }
 # define VUINT8x32_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_MIN_DEFINED
+#if !defined(VUINT8x32_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vuint8x32 mask;
@@ -2152,9 +2134,8 @@
 }
 # define VUINT8x32_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_MAX_DEFINED
+#if !defined(VUINT8x32_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vuint8x32 mask;
@@ -2164,30 +2145,8 @@
 }
 # define VUINT8x32_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2)
-{
-	vint8x32 ones = vint8x32_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x32_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x32_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_RSHIFT_DEFINED
+#if !defined(VUINT8x32_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -2195,9 +2154,8 @@
 }
 # define VUINT8x32_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x32_LRSHIFT_DEFINED
+#if !defined(VUINT8x32_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -2205,29 +2163,88 @@
 }
 # define VUINT8x32_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x32_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x32_NOT_DEFINED
-#endif
-
-
-/* vuint8x64 */
-
-#ifndef VINT8x64_SPLAT_DEFINED
+#if !defined(VUINT8x32_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x64_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x)
 {
 	vint8x64 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
+	vec.gcc[32] = x;
+	vec.gcc[33] = x;
+	vec.gcc[34] = x;
+	vec.gcc[35] = x;
+	vec.gcc[36] = x;
+	vec.gcc[37] = x;
+	vec.gcc[38] = x;
+	vec.gcc[39] = x;
+	vec.gcc[40] = x;
+	vec.gcc[41] = x;
+	vec.gcc[42] = x;
+	vec.gcc[43] = x;
+	vec.gcc[44] = x;
+	vec.gcc[45] = x;
+	vec.gcc[46] = x;
+	vec.gcc[47] = x;
+	vec.gcc[48] = x;
+	vec.gcc[49] = x;
+	vec.gcc[50] = x;
+	vec.gcc[51] = x;
+	vec.gcc[52] = x;
+	vec.gcc[53] = x;
+	vec.gcc[54] = x;
+	vec.gcc[55] = x;
+	vec.gcc[56] = x;
+	vec.gcc[57] = x;
+	vec.gcc[58] = x;
+	vec.gcc[59] = x;
+	vec.gcc[60] = x;
+	vec.gcc[61] = x;
+	vec.gcc[62] = x;
+	vec.gcc[63] = x;
 	return vec;
 }
 # define VINT8x64_SPLAT_DEFINED
 #endif
-#ifndef VINT8x64_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64])
 {
 	vint8x64 vec;
@@ -2236,7 +2253,7 @@
 }
 # define VINT8x64_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x64_LOAD_DEFINED
+#if !defined(VINT8x64_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64])
 {
 	vint8x64 vec;
@@ -2245,21 +2262,21 @@
 }
 # define VINT8x64_LOAD_DEFINED
 #endif
-#ifndef VINT8x64_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 arr[64])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT8x64_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT8x64_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x64_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 arr[64])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT8x64_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT8x64_STORE_DEFINED
 #endif
-#ifndef VINT8x64_ADD_DEFINED
+#if !defined(VINT8x64_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -2267,7 +2284,7 @@
 }
 # define VINT8x64_ADD_DEFINED
 #endif
-#ifndef VINT8x64_SUB_DEFINED
+#if !defined(VINT8x64_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -2275,7 +2292,7 @@
 }
 # define VINT8x64_SUB_DEFINED
 #endif
-#ifndef VINT8x64_MUL_DEFINED
+#if !defined(VINT8x64_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -2283,106 +2300,8 @@
 }
 # define VINT8x64_MUL_DEFINED
 #endif
-#ifndef VINT8x64_AND_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_AND_DEFINED
-#endif
-#ifndef VINT8x64_OR_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_OR_DEFINED
-#endif
-#ifndef VINT8x64_XOR_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_CMPLT_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_CMPGT_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_CMPLE_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_CMPGE_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_MIN_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2)
-{
-	vint8x64 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x64_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_MAX_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2)
-{
-	vint8x64 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT8x64_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_AVG_DEFINED
+#if !defined(VINT8x64_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2)
 {
 	vint8x64 ones = vint8x64_splat(1);
@@ -2396,19 +2315,107 @@
 }
 # define VINT8x64_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT8x64_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_RSHIFT_DEFINED
+#if !defined(VINT8x64_AND_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_AND_DEFINED
+#endif
+#if !defined(VINT8x64_OR_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_OR_DEFINED
+#endif
+#if !defined(VINT8x64_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_XOR_DEFINED
+#endif
+#if !defined(VINT8x64_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x64_NOT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x64_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x64_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x64_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2)
+{
+	vint8x64 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x64_MIN_DEFINED
+#endif
+#if !defined(VINT8x64_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2)
+{
+	vint8x64 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x64_MAX_DEFINED
+#endif
+#if !defined(VINT8x64_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -2416,9 +2423,8 @@
 }
 # define VINT8x64_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT8x64_LRSHIFT_DEFINED
+#if !defined(VINT8x64_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -2426,29 +2432,88 @@
 }
 # define VINT8x64_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT8x64_NOT_DEFINED
-VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT8x64_NOT_DEFINED
-#endif
-
-
-/* vint8x64 */
-
-#ifndef VUINT8x64_SPLAT_DEFINED
+#if !defined(VINT8x64_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x64_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x)
 {
 	vuint8x64 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
+	vec.gcc[32] = x;
+	vec.gcc[33] = x;
+	vec.gcc[34] = x;
+	vec.gcc[35] = x;
+	vec.gcc[36] = x;
+	vec.gcc[37] = x;
+	vec.gcc[38] = x;
+	vec.gcc[39] = x;
+	vec.gcc[40] = x;
+	vec.gcc[41] = x;
+	vec.gcc[42] = x;
+	vec.gcc[43] = x;
+	vec.gcc[44] = x;
+	vec.gcc[45] = x;
+	vec.gcc[46] = x;
+	vec.gcc[47] = x;
+	vec.gcc[48] = x;
+	vec.gcc[49] = x;
+	vec.gcc[50] = x;
+	vec.gcc[51] = x;
+	vec.gcc[52] = x;
+	vec.gcc[53] = x;
+	vec.gcc[54] = x;
+	vec.gcc[55] = x;
+	vec.gcc[56] = x;
+	vec.gcc[57] = x;
+	vec.gcc[58] = x;
+	vec.gcc[59] = x;
+	vec.gcc[60] = x;
+	vec.gcc[61] = x;
+	vec.gcc[62] = x;
+	vec.gcc[63] = x;
 	return vec;
 }
 # define VUINT8x64_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x64_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64])
 {
 	vuint8x64 vec;
@@ -2457,7 +2522,7 @@
 }
 # define VUINT8x64_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x64_LOAD_DEFINED
+#if !defined(VUINT8x64_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64])
 {
 	vuint8x64 vec;
@@ -2466,21 +2531,21 @@
 }
 # define VUINT8x64_LOAD_DEFINED
 #endif
-#ifndef VUINT8x64_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 arr[64])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT8x64_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x64_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 arr[64])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT8x64_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT8x64_STORE_DEFINED
 #endif
-#ifndef VUINT8x64_ADD_DEFINED
+#if !defined(VUINT8x64_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -2488,7 +2553,7 @@
 }
 # define VUINT8x64_ADD_DEFINED
 #endif
-#ifndef VUINT8x64_SUB_DEFINED
+#if !defined(VUINT8x64_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -2496,7 +2561,7 @@
 }
 # define VUINT8x64_SUB_DEFINED
 #endif
-#ifndef VUINT8x64_MUL_DEFINED
+#if !defined(VUINT8x64_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -2504,7 +2569,16 @@
 }
 # define VUINT8x64_MUL_DEFINED
 #endif
-#ifndef VUINT8x64_AND_DEFINED
+#if !defined(VUINT8x64_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x64_AVG_DEFINED
+#endif
+#if !defined(VUINT8x64_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -2512,7 +2586,7 @@
 }
 # define VUINT8x64_AND_DEFINED
 #endif
-#ifndef VUINT8x64_OR_DEFINED
+#if !defined(VUINT8x64_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -2520,7 +2594,7 @@
 }
 # define VUINT8x64_OR_DEFINED
 #endif
-#ifndef VUINT8x64_XOR_DEFINED
+#if !defined(VUINT8x64_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -2528,8 +2602,16 @@
 }
 # define VUINT8x64_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_CMPLT_DEFINED
+#if !defined(VUINT8x64_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x64_NOT_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -2537,9 +2619,8 @@
 }
 # define VUINT8x64_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_CMPEQ_DEFINED
+#if !defined(VUINT8x64_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -2547,9 +2628,8 @@
 }
 # define VUINT8x64_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_CMPGT_DEFINED
+#if !defined(VUINT8x64_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -2557,9 +2637,8 @@
 }
 # define VUINT8x64_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_CMPLE_DEFINED
+#if !defined(VUINT8x64_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -2567,9 +2646,8 @@
 }
 # define VUINT8x64_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_CMPGE_DEFINED
+#if !defined(VUINT8x64_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -2577,9 +2655,8 @@
 }
 # define VUINT8x64_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_MIN_DEFINED
+#if !defined(VUINT8x64_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vuint8x64 mask;
@@ -2589,9 +2666,8 @@
 }
 # define VUINT8x64_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_MAX_DEFINED
+#if !defined(VUINT8x64_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vuint8x64 mask;
@@ -2601,30 +2677,8 @@
 }
 # define VUINT8x64_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2)
-{
-	vint8x64 ones = vint8x64_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT8x64_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT8x64_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_RSHIFT_DEFINED
+#if !defined(VUINT8x64_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -2632,9 +2686,8 @@
 }
 # define VUINT8x64_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT8x64_LRSHIFT_DEFINED
+#if !defined(VUINT8x64_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -2642,29 +2695,26 @@
 }
 # define VUINT8x64_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x64_NOT_DEFINED
-VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT8x64_NOT_DEFINED
-#endif
-
-
-/* vuint16x2 */
-
-#ifndef VINT16x2_SPLAT_DEFINED
+#if !defined(VUINT8x64_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x)
 {
 	vint16x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VINT16x2_SPLAT_DEFINED
 #endif
-#ifndef VINT16x2_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2])
 {
 	vint16x2 vec;
@@ -2673,7 +2723,7 @@
 }
 # define VINT16x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x2_LOAD_DEFINED
+#if !defined(VINT16x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2])
 {
 	vint16x2 vec;
@@ -2682,21 +2732,21 @@
 }
 # define VINT16x2_LOAD_DEFINED
 #endif
-#ifndef VINT16x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT16x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT16x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x2_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT16x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT16x2_STORE_DEFINED
 #endif
-#ifndef VINT16x2_ADD_DEFINED
+#if !defined(VINT16x2_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -2704,7 +2754,7 @@
 }
 # define VINT16x2_ADD_DEFINED
 #endif
-#ifndef VINT16x2_SUB_DEFINED
+#if !defined(VINT16x2_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -2712,7 +2762,7 @@
 }
 # define VINT16x2_SUB_DEFINED
 #endif
-#ifndef VINT16x2_MUL_DEFINED
+#if !defined(VINT16x2_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -2720,106 +2770,8 @@
 }
 # define VINT16x2_MUL_DEFINED
 #endif
-#ifndef VINT16x2_AND_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_AND_DEFINED
-#endif
-#ifndef VINT16x2_OR_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_OR_DEFINED
-#endif
-#ifndef VINT16x2_XOR_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_CMPLT_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_CMPGT_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_CMPLE_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_CMPGE_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_MIN_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2)
-{
-	vint16x2 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x2_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_MAX_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2)
-{
-	vint16x2 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x2_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_AVG_DEFINED
+#if !defined(VINT16x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2)
 {
 	vint16x2 ones = vint16x2_splat(1);
@@ -2833,19 +2785,107 @@
 }
 # define VINT16x2_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT16x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_RSHIFT_DEFINED
+#if !defined(VINT16x2_AND_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_AND_DEFINED
+#endif
+#if !defined(VINT16x2_OR_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_OR_DEFINED
+#endif
+#if !defined(VINT16x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_XOR_DEFINED
+#endif
+#if !defined(VINT16x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x2_NOT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2)
+{
+	vint16x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x2_MIN_DEFINED
+#endif
+#if !defined(VINT16x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2)
+{
+	vint16x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x2_MAX_DEFINED
+#endif
+#if !defined(VINT16x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -2853,9 +2893,8 @@
 }
 # define VINT16x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x2_LRSHIFT_DEFINED
+#if !defined(VINT16x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
@@ -2863,29 +2902,26 @@
 }
 # define VINT16x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT16x2_NOT_DEFINED
-VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT16x2_NOT_DEFINED
-#endif
-
-
-/* vint16x2 */
-
-#ifndef VUINT16x2_SPLAT_DEFINED
+#if !defined(VINT16x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x)
 {
 	vuint16x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VUINT16x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2])
 {
 	vuint16x2 vec;
@@ -2894,7 +2930,7 @@
 }
 # define VUINT16x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x2_LOAD_DEFINED
+#if !defined(VUINT16x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2])
 {
 	vuint16x2 vec;
@@ -2903,21 +2939,21 @@
 }
 # define VUINT16x2_LOAD_DEFINED
 #endif
-#ifndef VUINT16x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT16x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x2_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT16x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT16x2_STORE_DEFINED
 #endif
-#ifndef VUINT16x2_ADD_DEFINED
+#if !defined(VUINT16x2_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -2925,7 +2961,7 @@
 }
 # define VUINT16x2_ADD_DEFINED
 #endif
-#ifndef VUINT16x2_SUB_DEFINED
+#if !defined(VUINT16x2_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -2933,7 +2969,7 @@
 }
 # define VUINT16x2_SUB_DEFINED
 #endif
-#ifndef VUINT16x2_MUL_DEFINED
+#if !defined(VUINT16x2_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -2941,7 +2977,16 @@
 }
 # define VUINT16x2_MUL_DEFINED
 #endif
-#ifndef VUINT16x2_AND_DEFINED
+#if !defined(VUINT16x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x2_AVG_DEFINED
+#endif
+#if !defined(VUINT16x2_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -2949,7 +2994,7 @@
 }
 # define VUINT16x2_AND_DEFINED
 #endif
-#ifndef VUINT16x2_OR_DEFINED
+#if !defined(VUINT16x2_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -2957,7 +3002,7 @@
 }
 # define VUINT16x2_OR_DEFINED
 #endif
-#ifndef VUINT16x2_XOR_DEFINED
+#if !defined(VUINT16x2_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -2965,8 +3010,16 @@
 }
 # define VUINT16x2_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_CMPLT_DEFINED
+#if !defined(VUINT16x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x2_NOT_DEFINED
+#endif
+#if !defined(VUINT16x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -2974,9 +3027,8 @@
 }
 # define VUINT16x2_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_CMPEQ_DEFINED
+#if !defined(VUINT16x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -2984,9 +3036,8 @@
 }
 # define VUINT16x2_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_CMPGT_DEFINED
+#if !defined(VUINT16x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -2994,9 +3045,8 @@
 }
 # define VUINT16x2_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_CMPLE_DEFINED
+#if !defined(VUINT16x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -3004,9 +3054,8 @@
 }
 # define VUINT16x2_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_CMPGE_DEFINED
+#if !defined(VUINT16x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -3014,9 +3063,8 @@
 }
 # define VUINT16x2_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_MIN_DEFINED
+#if !defined(VUINT16x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vuint16x2 mask;
@@ -3026,9 +3074,8 @@
 }
 # define VUINT16x2_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_MAX_DEFINED
+#if !defined(VUINT16x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vuint16x2 mask;
@@ -3038,30 +3085,8 @@
 }
 # define VUINT16x2_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2)
-{
-	vint16x2 ones = vint16x2_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT16x2_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT16x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_RSHIFT_DEFINED
+#if !defined(VUINT16x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -3069,9 +3094,8 @@
 }
 # define VUINT16x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x2_LRSHIFT_DEFINED
+#if !defined(VUINT16x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
@@ -3079,29 +3103,28 @@
 }
 # define VUINT16x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x2_NOT_DEFINED
-VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT16x2_NOT_DEFINED
-#endif
-
-
-/* vuint16x4 */
-
-#ifndef VINT16x4_SPLAT_DEFINED
+#if !defined(VUINT16x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x)
 {
 	vint16x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VINT16x4_SPLAT_DEFINED
 #endif
-#ifndef VINT16x4_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4])
 {
 	vint16x4 vec;
@@ -3110,7 +3133,7 @@
 }
 # define VINT16x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x4_LOAD_DEFINED
+#if !defined(VINT16x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4])
 {
 	vint16x4 vec;
@@ -3119,21 +3142,21 @@
 }
 # define VINT16x4_LOAD_DEFINED
 #endif
-#ifndef VINT16x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT16x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT16x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x4_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT16x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT16x4_STORE_DEFINED
 #endif
-#ifndef VINT16x4_ADD_DEFINED
+#if !defined(VINT16x4_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -3141,7 +3164,7 @@
 }
 # define VINT16x4_ADD_DEFINED
 #endif
-#ifndef VINT16x4_SUB_DEFINED
+#if !defined(VINT16x4_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -3149,7 +3172,7 @@
 }
 # define VINT16x4_SUB_DEFINED
 #endif
-#ifndef VINT16x4_MUL_DEFINED
+#if !defined(VINT16x4_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -3157,106 +3180,8 @@
 }
 # define VINT16x4_MUL_DEFINED
 #endif
-#ifndef VINT16x4_AND_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_AND_DEFINED
-#endif
-#ifndef VINT16x4_OR_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_OR_DEFINED
-#endif
-#ifndef VINT16x4_XOR_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_CMPLT_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_CMPGT_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_CMPLE_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_CMPGE_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_MIN_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2)
-{
-	vint16x4 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x4_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_MAX_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2)
-{
-	vint16x4 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x4_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_AVG_DEFINED
+#if !defined(VINT16x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2)
 {
 	vint16x4 ones = vint16x4_splat(1);
@@ -3270,19 +3195,107 @@
 }
 # define VINT16x4_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT16x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_RSHIFT_DEFINED
+#if !defined(VINT16x4_AND_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_AND_DEFINED
+#endif
+#if !defined(VINT16x4_OR_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_OR_DEFINED
+#endif
+#if !defined(VINT16x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_XOR_DEFINED
+#endif
+#if !defined(VINT16x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x4_NOT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2)
+{
+	vint16x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x4_MIN_DEFINED
+#endif
+#if !defined(VINT16x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2)
+{
+	vint16x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x4_MAX_DEFINED
+#endif
+#if !defined(VINT16x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -3290,9 +3303,8 @@
 }
 # define VINT16x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x4_LRSHIFT_DEFINED
+#if !defined(VINT16x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -3300,29 +3312,28 @@
 }
 # define VINT16x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT16x4_NOT_DEFINED
-VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT16x4_NOT_DEFINED
-#endif
-
-
-/* vint16x4 */
-
-#ifndef VUINT16x4_SPLAT_DEFINED
+#if !defined(VINT16x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x)
 {
 	vuint16x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VUINT16x4_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x4_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4])
 {
 	vuint16x4 vec;
@@ -3331,7 +3342,7 @@
 }
 # define VUINT16x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x4_LOAD_DEFINED
+#if !defined(VUINT16x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4])
 {
 	vuint16x4 vec;
@@ -3340,21 +3351,21 @@
 }
 # define VUINT16x4_LOAD_DEFINED
 #endif
-#ifndef VUINT16x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT16x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x4_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT16x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT16x4_STORE_DEFINED
 #endif
-#ifndef VUINT16x4_ADD_DEFINED
+#if !defined(VUINT16x4_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -3362,7 +3373,7 @@
 }
 # define VUINT16x4_ADD_DEFINED
 #endif
-#ifndef VUINT16x4_SUB_DEFINED
+#if !defined(VUINT16x4_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -3370,7 +3381,7 @@
 }
 # define VUINT16x4_SUB_DEFINED
 #endif
-#ifndef VUINT16x4_MUL_DEFINED
+#if !defined(VUINT16x4_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -3378,7 +3389,16 @@
 }
 # define VUINT16x4_MUL_DEFINED
 #endif
-#ifndef VUINT16x4_AND_DEFINED
+#if !defined(VUINT16x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x4_AVG_DEFINED
+#endif
+#if !defined(VUINT16x4_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -3386,7 +3406,7 @@
 }
 # define VUINT16x4_AND_DEFINED
 #endif
-#ifndef VUINT16x4_OR_DEFINED
+#if !defined(VUINT16x4_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -3394,7 +3414,7 @@
 }
 # define VUINT16x4_OR_DEFINED
 #endif
-#ifndef VUINT16x4_XOR_DEFINED
+#if !defined(VUINT16x4_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -3402,8 +3422,16 @@
 }
 # define VUINT16x4_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_CMPLT_DEFINED
+#if !defined(VUINT16x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x4_NOT_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -3411,9 +3439,8 @@
 }
 # define VUINT16x4_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_CMPEQ_DEFINED
+#if !defined(VUINT16x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -3421,9 +3448,8 @@
 }
 # define VUINT16x4_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_CMPGT_DEFINED
+#if !defined(VUINT16x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -3431,9 +3457,8 @@
 }
 # define VUINT16x4_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_CMPLE_DEFINED
+#if !defined(VUINT16x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -3441,9 +3466,8 @@
 }
 # define VUINT16x4_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_CMPGE_DEFINED
+#if !defined(VUINT16x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -3451,9 +3475,8 @@
 }
 # define VUINT16x4_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_MIN_DEFINED
+#if !defined(VUINT16x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vuint16x4 mask;
@@ -3463,9 +3486,8 @@
 }
 # define VUINT16x4_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_MAX_DEFINED
+#if !defined(VUINT16x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vuint16x4 mask;
@@ -3475,30 +3497,8 @@
 }
 # define VUINT16x4_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2)
-{
-	vint16x4 ones = vint16x4_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT16x4_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT16x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_RSHIFT_DEFINED
+#if !defined(VUINT16x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -3506,9 +3506,8 @@
 }
 # define VUINT16x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x4_LRSHIFT_DEFINED
+#if !defined(VUINT16x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -3516,29 +3515,32 @@
 }
 # define VUINT16x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x4_NOT_DEFINED
-VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT16x4_NOT_DEFINED
-#endif
-
-
-/* vuint16x8 */
-
-#ifndef VINT16x8_SPLAT_DEFINED
+#if !defined(VUINT16x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
 {
 	vint16x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VINT16x8_SPLAT_DEFINED
 #endif
-#ifndef VINT16x8_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
 {
 	vint16x8 vec;
@@ -3547,7 +3549,7 @@
 }
 # define VINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x8_LOAD_DEFINED
+#if !defined(VINT16x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
 {
 	vint16x8 vec;
@@ -3556,21 +3558,21 @@
 }
 # define VINT16x8_LOAD_DEFINED
 #endif
-#ifndef VINT16x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT16x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x8_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT16x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT16x8_STORE_DEFINED
 #endif
-#ifndef VINT16x8_ADD_DEFINED
+#if !defined(VINT16x8_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -3578,7 +3580,7 @@
 }
 # define VINT16x8_ADD_DEFINED
 #endif
-#ifndef VINT16x8_SUB_DEFINED
+#if !defined(VINT16x8_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -3586,7 +3588,7 @@
 }
 # define VINT16x8_SUB_DEFINED
 #endif
-#ifndef VINT16x8_MUL_DEFINED
+#if !defined(VINT16x8_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -3594,106 +3596,8 @@
 }
 # define VINT16x8_MUL_DEFINED
 #endif
-#ifndef VINT16x8_AND_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_AND_DEFINED
-#endif
-#ifndef VINT16x8_OR_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_OR_DEFINED
-#endif
-#ifndef VINT16x8_XOR_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_CMPLT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_CMPGT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_CMPLE_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_CMPGE_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_MIN_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
-{
-	vint16x8 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x8_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_MAX_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
-{
-	vint16x8 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x8_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_AVG_DEFINED
+#if !defined(VINT16x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 ones = vint16x8_splat(1);
@@ -3707,19 +3611,107 @@
 }
 # define VINT16x8_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT16x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_RSHIFT_DEFINED
+#if !defined(VINT16x8_AND_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_AND_DEFINED
+#endif
+#if !defined(VINT16x8_OR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_OR_DEFINED
+#endif
+#if !defined(VINT16x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_XOR_DEFINED
+#endif
+#if !defined(VINT16x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x8_NOT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x8_MIN_DEFINED
+#endif
+#if !defined(VINT16x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x8_MAX_DEFINED
+#endif
+#if !defined(VINT16x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -3727,9 +3719,8 @@
 }
 # define VINT16x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x8_LRSHIFT_DEFINED
+#if !defined(VINT16x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -3737,29 +3728,32 @@
 }
 # define VINT16x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT16x8_NOT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT16x8_NOT_DEFINED
-#endif
-
-
-/* vint16x8 */
-
-#ifndef VUINT16x8_SPLAT_DEFINED
+#if !defined(VINT16x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
 {
 	vuint16x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VUINT16x8_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
 {
 	vuint16x8 vec;
@@ -3768,7 +3762,7 @@
 }
 # define VUINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x8_LOAD_DEFINED
+#if !defined(VUINT16x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
 {
 	vuint16x8 vec;
@@ -3777,21 +3771,21 @@
 }
 # define VUINT16x8_LOAD_DEFINED
 #endif
-#ifndef VUINT16x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT16x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x8_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT16x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT16x8_STORE_DEFINED
 #endif
-#ifndef VUINT16x8_ADD_DEFINED
+#if !defined(VUINT16x8_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -3799,7 +3793,7 @@
 }
 # define VUINT16x8_ADD_DEFINED
 #endif
-#ifndef VUINT16x8_SUB_DEFINED
+#if !defined(VUINT16x8_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -3807,7 +3801,7 @@
 }
 # define VUINT16x8_SUB_DEFINED
 #endif
-#ifndef VUINT16x8_MUL_DEFINED
+#if !defined(VUINT16x8_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -3815,7 +3809,16 @@
 }
 # define VUINT16x8_MUL_DEFINED
 #endif
-#ifndef VUINT16x8_AND_DEFINED
+#if !defined(VUINT16x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x8_AVG_DEFINED
+#endif
+#if !defined(VUINT16x8_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -3823,7 +3826,7 @@
 }
 # define VUINT16x8_AND_DEFINED
 #endif
-#ifndef VUINT16x8_OR_DEFINED
+#if !defined(VUINT16x8_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -3831,7 +3834,7 @@
 }
 # define VUINT16x8_OR_DEFINED
 #endif
-#ifndef VUINT16x8_XOR_DEFINED
+#if !defined(VUINT16x8_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -3839,8 +3842,16 @@
 }
 # define VUINT16x8_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_CMPLT_DEFINED
+#if !defined(VUINT16x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x8_NOT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -3848,9 +3859,8 @@
 }
 # define VUINT16x8_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_CMPEQ_DEFINED
+#if !defined(VUINT16x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -3858,9 +3868,8 @@
 }
 # define VUINT16x8_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_CMPGT_DEFINED
+#if !defined(VUINT16x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -3868,9 +3877,8 @@
 }
 # define VUINT16x8_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_CMPLE_DEFINED
+#if !defined(VUINT16x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -3878,9 +3886,8 @@
 }
 # define VUINT16x8_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_CMPGE_DEFINED
+#if !defined(VUINT16x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -3888,9 +3895,8 @@
 }
 # define VUINT16x8_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_MIN_DEFINED
+#if !defined(VUINT16x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 mask;
@@ -3900,9 +3906,8 @@
 }
 # define VUINT16x8_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_MAX_DEFINED
+#if !defined(VUINT16x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 mask;
@@ -3912,30 +3917,8 @@
 }
 # define VUINT16x8_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
-{
-	vint16x8 ones = vint16x8_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT16x8_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT16x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_RSHIFT_DEFINED
+#if !defined(VUINT16x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -3943,9 +3926,8 @@
 }
 # define VUINT16x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x8_LRSHIFT_DEFINED
+#if !defined(VUINT16x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -3953,29 +3935,40 @@
 }
 # define VUINT16x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x8_NOT_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT16x8_NOT_DEFINED
-#endif
-
-
-/* vuint16x16 */
-
-#ifndef VINT16x16_SPLAT_DEFINED
+#if !defined(VUINT16x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x)
 {
 	vint16x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VINT16x16_SPLAT_DEFINED
 #endif
-#ifndef VINT16x16_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16])
 {
 	vint16x16 vec;
@@ -3984,7 +3977,7 @@
 }
 # define VINT16x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x16_LOAD_DEFINED
+#if !defined(VINT16x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16])
 {
 	vint16x16 vec;
@@ -3993,21 +3986,21 @@
 }
 # define VINT16x16_LOAD_DEFINED
 #endif
-#ifndef VINT16x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT16x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT16x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x16_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT16x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT16x16_STORE_DEFINED
 #endif
-#ifndef VINT16x16_ADD_DEFINED
+#if !defined(VINT16x16_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -4015,7 +4008,7 @@
 }
 # define VINT16x16_ADD_DEFINED
 #endif
-#ifndef VINT16x16_SUB_DEFINED
+#if !defined(VINT16x16_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -4023,7 +4016,7 @@
 }
 # define VINT16x16_SUB_DEFINED
 #endif
-#ifndef VINT16x16_MUL_DEFINED
+#if !defined(VINT16x16_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -4031,106 +4024,8 @@
 }
 # define VINT16x16_MUL_DEFINED
 #endif
-#ifndef VINT16x16_AND_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_AND_DEFINED
-#endif
-#ifndef VINT16x16_OR_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_OR_DEFINED
-#endif
-#ifndef VINT16x16_XOR_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_CMPLT_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_CMPGT_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_CMPLE_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_CMPGE_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_MIN_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2)
-{
-	vint16x16 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x16_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_MAX_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2)
-{
-	vint16x16 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x16_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_AVG_DEFINED
+#if !defined(VINT16x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2)
 {
 	vint16x16 ones = vint16x16_splat(1);
@@ -4144,19 +4039,107 @@
 }
 # define VINT16x16_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT16x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_RSHIFT_DEFINED
+#if !defined(VINT16x16_AND_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_AND_DEFINED
+#endif
+#if !defined(VINT16x16_OR_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_OR_DEFINED
+#endif
+#if !defined(VINT16x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_XOR_DEFINED
+#endif
+#if !defined(VINT16x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x16_NOT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2)
+{
+	vint16x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x16_MIN_DEFINED
+#endif
+#if !defined(VINT16x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2)
+{
+	vint16x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x16_MAX_DEFINED
+#endif
+#if !defined(VINT16x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -4164,9 +4147,8 @@
 }
 # define VINT16x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x16_LRSHIFT_DEFINED
+#if !defined(VINT16x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -4174,29 +4156,40 @@
 }
 # define VINT16x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT16x16_NOT_DEFINED
-VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT16x16_NOT_DEFINED
-#endif
-
-
-/* vint16x16 */
-
-#ifndef VUINT16x16_SPLAT_DEFINED
+#if !defined(VINT16x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x)
 {
 	vuint16x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VUINT16x16_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x16_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16])
 {
 	vuint16x16 vec;
@@ -4205,7 +4198,7 @@
 }
 # define VUINT16x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x16_LOAD_DEFINED
+#if !defined(VUINT16x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16])
 {
 	vuint16x16 vec;
@@ -4214,21 +4207,21 @@
 }
 # define VUINT16x16_LOAD_DEFINED
 #endif
-#ifndef VUINT16x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT16x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x16_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT16x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT16x16_STORE_DEFINED
 #endif
-#ifndef VUINT16x16_ADD_DEFINED
+#if !defined(VUINT16x16_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -4236,7 +4229,7 @@
 }
 # define VUINT16x16_ADD_DEFINED
 #endif
-#ifndef VUINT16x16_SUB_DEFINED
+#if !defined(VUINT16x16_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -4244,7 +4237,7 @@
 }
 # define VUINT16x16_SUB_DEFINED
 #endif
-#ifndef VUINT16x16_MUL_DEFINED
+#if !defined(VUINT16x16_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -4252,7 +4245,16 @@
 }
 # define VUINT16x16_MUL_DEFINED
 #endif
-#ifndef VUINT16x16_AND_DEFINED
+#if !defined(VUINT16x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x16_AVG_DEFINED
+#endif
+#if !defined(VUINT16x16_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -4260,7 +4262,7 @@
 }
 # define VUINT16x16_AND_DEFINED
 #endif
-#ifndef VUINT16x16_OR_DEFINED
+#if !defined(VUINT16x16_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -4268,7 +4270,7 @@
 }
 # define VUINT16x16_OR_DEFINED
 #endif
-#ifndef VUINT16x16_XOR_DEFINED
+#if !defined(VUINT16x16_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -4276,8 +4278,16 @@
 }
 # define VUINT16x16_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_CMPLT_DEFINED
+#if !defined(VUINT16x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x16_NOT_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -4285,9 +4295,8 @@
 }
 # define VUINT16x16_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_CMPEQ_DEFINED
+#if !defined(VUINT16x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -4295,9 +4304,8 @@
 }
 # define VUINT16x16_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_CMPGT_DEFINED
+#if !defined(VUINT16x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -4305,9 +4313,8 @@
 }
 # define VUINT16x16_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_CMPLE_DEFINED
+#if !defined(VUINT16x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -4315,9 +4322,8 @@
 }
 # define VUINT16x16_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_CMPGE_DEFINED
+#if !defined(VUINT16x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -4325,9 +4331,8 @@
 }
 # define VUINT16x16_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_MIN_DEFINED
+#if !defined(VUINT16x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vuint16x16 mask;
@@ -4337,9 +4342,8 @@
 }
 # define VUINT16x16_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_MAX_DEFINED
+#if !defined(VUINT16x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vuint16x16 mask;
@@ -4349,30 +4353,8 @@
 }
 # define VUINT16x16_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2)
-{
-	vint16x16 ones = vint16x16_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT16x16_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT16x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_RSHIFT_DEFINED
+#if !defined(VUINT16x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -4380,9 +4362,8 @@
 }
 # define VUINT16x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x16_LRSHIFT_DEFINED
+#if !defined(VUINT16x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -4390,29 +4371,56 @@
 }
 # define VUINT16x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x16_NOT_DEFINED
-VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT16x16_NOT_DEFINED
-#endif
-
-
-/* vuint16x32 */
-
-#ifndef VINT16x32_SPLAT_DEFINED
+#if !defined(VUINT16x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x32_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x)
 {
 	vint16x32 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
 	return vec;
 }
 # define VINT16x32_SPLAT_DEFINED
 #endif
-#ifndef VINT16x32_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32])
 {
 	vint16x32 vec;
@@ -4421,7 +4429,7 @@
 }
 # define VINT16x32_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x32_LOAD_DEFINED
+#if !defined(VINT16x32_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32])
 {
 	vint16x32 vec;
@@ -4430,21 +4438,21 @@
 }
 # define VINT16x32_LOAD_DEFINED
 #endif
-#ifndef VINT16x32_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 arr[32])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT16x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT16x32_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x32_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 arr[32])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT16x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT16x32_STORE_DEFINED
 #endif
-#ifndef VINT16x32_ADD_DEFINED
+#if !defined(VINT16x32_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -4452,7 +4460,7 @@
 }
 # define VINT16x32_ADD_DEFINED
 #endif
-#ifndef VINT16x32_SUB_DEFINED
+#if !defined(VINT16x32_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -4460,7 +4468,7 @@
 }
 # define VINT16x32_SUB_DEFINED
 #endif
-#ifndef VINT16x32_MUL_DEFINED
+#if !defined(VINT16x32_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -4468,106 +4476,8 @@
 }
 # define VINT16x32_MUL_DEFINED
 #endif
-#ifndef VINT16x32_AND_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_AND_DEFINED
-#endif
-#ifndef VINT16x32_OR_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_OR_DEFINED
-#endif
-#ifndef VINT16x32_XOR_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_CMPLT_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_CMPGT_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_CMPLE_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_CMPGE_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_MIN_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2)
-{
-	vint16x32 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x32_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_MAX_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2)
-{
-	vint16x32 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT16x32_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_AVG_DEFINED
+#if !defined(VINT16x32_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2)
 {
 	vint16x32 ones = vint16x32_splat(1);
@@ -4581,19 +4491,107 @@
 }
 # define VINT16x32_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT16x32_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_RSHIFT_DEFINED
+#if !defined(VINT16x32_AND_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_AND_DEFINED
+#endif
+#if !defined(VINT16x32_OR_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_OR_DEFINED
+#endif
+#if !defined(VINT16x32_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_XOR_DEFINED
+#endif
+#if !defined(VINT16x32_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x32_NOT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x32_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x32_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x32_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2)
+{
+	vint16x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x32_MIN_DEFINED
+#endif
+#if !defined(VINT16x32_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2)
+{
+	vint16x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x32_MAX_DEFINED
+#endif
+#if !defined(VINT16x32_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -4601,9 +4599,8 @@
 }
 # define VINT16x32_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT16x32_LRSHIFT_DEFINED
+#if !defined(VINT16x32_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -4611,29 +4608,56 @@
 }
 # define VINT16x32_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT16x32_NOT_DEFINED
-VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT16x32_NOT_DEFINED
-#endif
-
-
-/* vint16x32 */
-
-#ifndef VUINT16x32_SPLAT_DEFINED
+#if !defined(VINT16x32_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x32_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x)
 {
 	vuint16x32 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	vec.gcc[16] = x;
+	vec.gcc[17] = x;
+	vec.gcc[18] = x;
+	vec.gcc[19] = x;
+	vec.gcc[20] = x;
+	vec.gcc[21] = x;
+	vec.gcc[22] = x;
+	vec.gcc[23] = x;
+	vec.gcc[24] = x;
+	vec.gcc[25] = x;
+	vec.gcc[26] = x;
+	vec.gcc[27] = x;
+	vec.gcc[28] = x;
+	vec.gcc[29] = x;
+	vec.gcc[30] = x;
+	vec.gcc[31] = x;
 	return vec;
 }
 # define VUINT16x32_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x32_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32])
 {
 	vuint16x32 vec;
@@ -4642,7 +4666,7 @@
 }
 # define VUINT16x32_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x32_LOAD_DEFINED
+#if !defined(VUINT16x32_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32])
 {
 	vuint16x32 vec;
@@ -4651,21 +4675,21 @@
 }
 # define VUINT16x32_LOAD_DEFINED
 #endif
-#ifndef VUINT16x32_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 arr[32])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT16x32_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x32_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 arr[32])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT16x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT16x32_STORE_DEFINED
 #endif
-#ifndef VUINT16x32_ADD_DEFINED
+#if !defined(VUINT16x32_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -4673,7 +4697,7 @@
 }
 # define VUINT16x32_ADD_DEFINED
 #endif
-#ifndef VUINT16x32_SUB_DEFINED
+#if !defined(VUINT16x32_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -4681,7 +4705,7 @@
 }
 # define VUINT16x32_SUB_DEFINED
 #endif
-#ifndef VUINT16x32_MUL_DEFINED
+#if !defined(VUINT16x32_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -4689,7 +4713,16 @@
 }
 # define VUINT16x32_MUL_DEFINED
 #endif
-#ifndef VUINT16x32_AND_DEFINED
+#if !defined(VUINT16x32_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x32_AVG_DEFINED
+#endif
+#if !defined(VUINT16x32_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -4697,7 +4730,7 @@
 }
 # define VUINT16x32_AND_DEFINED
 #endif
-#ifndef VUINT16x32_OR_DEFINED
+#if !defined(VUINT16x32_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -4705,7 +4738,7 @@
 }
 # define VUINT16x32_OR_DEFINED
 #endif
-#ifndef VUINT16x32_XOR_DEFINED
+#if !defined(VUINT16x32_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -4713,8 +4746,16 @@
 }
 # define VUINT16x32_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_CMPLT_DEFINED
+#if !defined(VUINT16x32_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x32_NOT_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -4722,9 +4763,8 @@
 }
 # define VUINT16x32_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_CMPEQ_DEFINED
+#if !defined(VUINT16x32_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -4732,9 +4772,8 @@
 }
 # define VUINT16x32_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_CMPGT_DEFINED
+#if !defined(VUINT16x32_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -4742,9 +4781,8 @@
 }
 # define VUINT16x32_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_CMPLE_DEFINED
+#if !defined(VUINT16x32_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -4752,9 +4790,8 @@
 }
 # define VUINT16x32_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_CMPGE_DEFINED
+#if !defined(VUINT16x32_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -4762,9 +4799,8 @@
 }
 # define VUINT16x32_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_MIN_DEFINED
+#if !defined(VUINT16x32_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vuint16x32 mask;
@@ -4774,9 +4810,8 @@
 }
 # define VUINT16x32_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_MAX_DEFINED
+#if !defined(VUINT16x32_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vuint16x32 mask;
@@ -4786,30 +4821,8 @@
 }
 # define VUINT16x32_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2)
-{
-	vint16x32 ones = vint16x32_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT16x32_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT16x32_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_RSHIFT_DEFINED
+#if !defined(VUINT16x32_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -4817,9 +4830,8 @@
 }
 # define VUINT16x32_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT16x32_LRSHIFT_DEFINED
+#if !defined(VUINT16x32_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -4827,29 +4839,26 @@
 }
 # define VUINT16x32_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x32_NOT_DEFINED
-VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT16x32_NOT_DEFINED
-#endif
-
-
-/* vuint32x2 */
-
-#ifndef VINT32x2_SPLAT_DEFINED
+#if !defined(VUINT16x32_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x)
 {
 	vint32x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VINT32x2_SPLAT_DEFINED
 #endif
-#ifndef VINT32x2_LOAD_ALIGNED_DEFINED
+#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2])
 {
 	vint32x2 vec;
@@ -4858,7 +4867,7 @@
 }
 # define VINT32x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x2_LOAD_DEFINED
+#if !defined(VINT32x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2])
 {
 	vint32x2 vec;
@@ -4867,21 +4876,21 @@
 }
 # define VINT32x2_LOAD_DEFINED
 #endif
-#ifndef VINT32x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT32x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x2_STORE_DEFINED
-VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT32x2_STORE_DEFINED
 #endif
-#ifndef VINT32x2_ADD_DEFINED
+#if !defined(VINT32x2_ADD_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -4889,7 +4898,7 @@
 }
 # define VINT32x2_ADD_DEFINED
 #endif
-#ifndef VINT32x2_SUB_DEFINED
+#if !defined(VINT32x2_SUB_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -4897,7 +4906,7 @@
 }
 # define VINT32x2_SUB_DEFINED
 #endif
-#ifndef VINT32x2_MUL_DEFINED
+#if !defined(VINT32x2_MUL_DEFINED)
 VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -4905,106 +4914,8 @@
 }
 # define VINT32x2_MUL_DEFINED
 #endif
-#ifndef VINT32x2_AND_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_AND_DEFINED
-#endif
-#ifndef VINT32x2_OR_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_OR_DEFINED
-#endif
-#ifndef VINT32x2_XOR_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_CMPLT_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_CMPGT_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_CMPLE_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_CMPGE_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_MIN_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2)
-{
-	vint32x2 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x2_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_MAX_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2)
-{
-	vint32x2 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x2_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_AVG_DEFINED
+#if !defined(VINT32x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2)
 {
 	vint32x2 ones = vint32x2_splat(1);
@@ -5018,19 +4929,107 @@
 }
 # define VINT32x2_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT32x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_RSHIFT_DEFINED
+#if !defined(VINT32x2_AND_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_AND_DEFINED
+#endif
+#if !defined(VINT32x2_OR_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_OR_DEFINED
+#endif
+#if !defined(VINT32x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_XOR_DEFINED
+#endif
+#if !defined(VINT32x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x2_NOT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2)
+{
+	vint32x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x2_MIN_DEFINED
+#endif
+#if !defined(VINT32x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2)
+{
+	vint32x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x2_MAX_DEFINED
+#endif
+#if !defined(VINT32x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -5038,9 +5037,8 @@
 }
 # define VINT32x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x2_LRSHIFT_DEFINED
+#if !defined(VINT32x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -5048,29 +5046,26 @@
 }
 # define VINT32x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT32x2_NOT_DEFINED
-VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT32x2_NOT_DEFINED
-#endif
-
-
-/* vint32x2 */
-
-#ifndef VUINT32x2_SPLAT_DEFINED
+#if !defined(VINT32x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x)
 {
 	vuint32x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VUINT32x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2])
 {
 	vuint32x2 vec;
@@ -5079,7 +5074,7 @@
 }
 # define VUINT32x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x2_LOAD_DEFINED
+#if !defined(VUINT32x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2])
 {
 	vuint32x2 vec;
@@ -5088,21 +5083,21 @@
 }
 # define VUINT32x2_LOAD_DEFINED
 #endif
-#ifndef VUINT32x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT32x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x2_STORE_DEFINED
-VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT32x2_STORE_DEFINED
 #endif
-#ifndef VUINT32x2_ADD_DEFINED
+#if !defined(VUINT32x2_ADD_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -5110,7 +5105,7 @@
 }
 # define VUINT32x2_ADD_DEFINED
 #endif
-#ifndef VUINT32x2_SUB_DEFINED
+#if !defined(VUINT32x2_SUB_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -5118,7 +5113,7 @@
 }
 # define VUINT32x2_SUB_DEFINED
 #endif
-#ifndef VUINT32x2_MUL_DEFINED
+#if !defined(VUINT32x2_MUL_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -5126,7 +5121,16 @@
 }
 # define VUINT32x2_MUL_DEFINED
 #endif
-#ifndef VUINT32x2_AND_DEFINED
+#if !defined(VUINT32x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x2_AVG_DEFINED
+#endif
+#if !defined(VUINT32x2_AND_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -5134,7 +5138,7 @@
 }
 # define VUINT32x2_AND_DEFINED
 #endif
-#ifndef VUINT32x2_OR_DEFINED
+#if !defined(VUINT32x2_OR_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -5142,7 +5146,7 @@
 }
 # define VUINT32x2_OR_DEFINED
 #endif
-#ifndef VUINT32x2_XOR_DEFINED
+#if !defined(VUINT32x2_XOR_DEFINED)
 VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -5150,8 +5154,16 @@
 }
 # define VUINT32x2_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_CMPLT_DEFINED
+#if !defined(VUINT32x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x2_NOT_DEFINED
+#endif
+#if !defined(VUINT32x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -5159,9 +5171,8 @@
 }
 # define VUINT32x2_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_CMPEQ_DEFINED
+#if !defined(VUINT32x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -5169,9 +5180,8 @@
 }
 # define VUINT32x2_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_CMPGT_DEFINED
+#if !defined(VUINT32x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -5179,9 +5189,8 @@
 }
 # define VUINT32x2_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_CMPLE_DEFINED
+#if !defined(VUINT32x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -5189,9 +5198,8 @@
 }
 # define VUINT32x2_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_CMPGE_DEFINED
+#if !defined(VUINT32x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -5199,9 +5207,8 @@
 }
 # define VUINT32x2_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_MIN_DEFINED
+#if !defined(VUINT32x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vuint32x2 mask;
@@ -5211,9 +5218,8 @@
 }
 # define VUINT32x2_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_MAX_DEFINED
+#if !defined(VUINT32x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vuint32x2 mask;
@@ -5223,30 +5229,8 @@
 }
 # define VUINT32x2_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_AVG_DEFINED
-VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2)
-{
-	vint32x2 ones = vint32x2_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT32x2_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT32x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_RSHIFT_DEFINED
+#if !defined(VUINT32x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -5254,9 +5238,8 @@
 }
 # define VUINT32x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x2_LRSHIFT_DEFINED
+#if !defined(VUINT32x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
@@ -5264,29 +5247,28 @@
 }
 # define VUINT32x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT32x2_NOT_DEFINED
-VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT32x2_NOT_DEFINED
-#endif
-
-
-/* vuint32x4 */
-
-#ifndef VINT32x4_SPLAT_DEFINED
+#if !defined(VUINT32x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
 {
 	vint32x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VINT32x4_SPLAT_DEFINED
 #endif
-#ifndef VINT32x4_LOAD_ALIGNED_DEFINED
+#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
 {
 	vint32x4 vec;
@@ -5295,7 +5277,7 @@
 }
 # define VINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x4_LOAD_DEFINED
+#if !defined(VINT32x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
 {
 	vint32x4 vec;
@@ -5304,21 +5286,21 @@
 }
 # define VINT32x4_LOAD_DEFINED
 #endif
-#ifndef VINT32x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT32x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x4_STORE_DEFINED
-VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT32x4_STORE_DEFINED
 #endif
-#ifndef VINT32x4_ADD_DEFINED
+#if !defined(VINT32x4_ADD_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -5326,7 +5308,7 @@
 }
 # define VINT32x4_ADD_DEFINED
 #endif
-#ifndef VINT32x4_SUB_DEFINED
+#if !defined(VINT32x4_SUB_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -5334,7 +5316,7 @@
 }
 # define VINT32x4_SUB_DEFINED
 #endif
-#ifndef VINT32x4_MUL_DEFINED
+#if !defined(VINT32x4_MUL_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -5342,106 +5324,8 @@
 }
 # define VINT32x4_MUL_DEFINED
 #endif
-#ifndef VINT32x4_AND_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_AND_DEFINED
-#endif
-#ifndef VINT32x4_OR_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_OR_DEFINED
-#endif
-#ifndef VINT32x4_XOR_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_CMPLT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_CMPGT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_CMPLE_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_CMPGE_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_MIN_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
-{
-	vint32x4 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x4_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_MAX_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
-{
-	vint32x4 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x4_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_AVG_DEFINED
+#if !defined(VINT32x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 ones = vint32x4_splat(1);
@@ -5455,19 +5339,107 @@
 }
 # define VINT32x4_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT32x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_RSHIFT_DEFINED
+#if !defined(VINT32x4_AND_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_AND_DEFINED
+#endif
+#if !defined(VINT32x4_OR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_OR_DEFINED
+#endif
+#if !defined(VINT32x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_XOR_DEFINED
+#endif
+#if !defined(VINT32x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x4_NOT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x4_MIN_DEFINED
+#endif
+#if !defined(VINT32x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x4_MAX_DEFINED
+#endif
+#if !defined(VINT32x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -5475,9 +5447,8 @@
 }
 # define VINT32x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x4_LRSHIFT_DEFINED
+#if !defined(VINT32x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -5485,29 +5456,28 @@
 }
 # define VINT32x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT32x4_NOT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT32x4_NOT_DEFINED
-#endif
-
-
-/* vint32x4 */
-
-#ifndef VUINT32x4_SPLAT_DEFINED
+#if !defined(VINT32x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
 {
 	vuint32x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VUINT32x4_SPLAT_DEFINED
 #endif
-#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
 {
 	vuint32x4 vec;
@@ -5516,7 +5486,7 @@
 }
 # define VUINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x4_LOAD_DEFINED
+#if !defined(VUINT32x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
 {
 	vuint32x4 vec;
@@ -5525,21 +5495,21 @@
 }
 # define VUINT32x4_LOAD_DEFINED
 #endif
-#ifndef VUINT32x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT32x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x4_STORE_DEFINED
-VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT32x4_STORE_DEFINED
 #endif
-#ifndef VUINT32x4_ADD_DEFINED
+#if !defined(VUINT32x4_ADD_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -5547,7 +5517,7 @@
 }
 # define VUINT32x4_ADD_DEFINED
 #endif
-#ifndef VUINT32x4_SUB_DEFINED
+#if !defined(VUINT32x4_SUB_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -5555,7 +5525,7 @@
 }
 # define VUINT32x4_SUB_DEFINED
 #endif
-#ifndef VUINT32x4_MUL_DEFINED
+#if !defined(VUINT32x4_MUL_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -5563,7 +5533,16 @@
 }
 # define VUINT32x4_MUL_DEFINED
 #endif
-#ifndef VUINT32x4_AND_DEFINED
+#if !defined(VUINT32x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x4_AVG_DEFINED
+#endif
+#if !defined(VUINT32x4_AND_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -5571,7 +5550,7 @@
 }
 # define VUINT32x4_AND_DEFINED
 #endif
-#ifndef VUINT32x4_OR_DEFINED
+#if !defined(VUINT32x4_OR_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -5579,7 +5558,7 @@
 }
 # define VUINT32x4_OR_DEFINED
 #endif
-#ifndef VUINT32x4_XOR_DEFINED
+#if !defined(VUINT32x4_XOR_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -5587,8 +5566,16 @@
 }
 # define VUINT32x4_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_CMPLT_DEFINED
+#if !defined(VUINT32x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x4_NOT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -5596,9 +5583,8 @@
 }
 # define VUINT32x4_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_CMPEQ_DEFINED
+#if !defined(VUINT32x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -5606,9 +5592,8 @@
 }
 # define VUINT32x4_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_CMPGT_DEFINED
+#if !defined(VUINT32x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -5616,9 +5601,8 @@
 }
 # define VUINT32x4_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_CMPLE_DEFINED
+#if !defined(VUINT32x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -5626,9 +5610,8 @@
 }
 # define VUINT32x4_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_CMPGE_DEFINED
+#if !defined(VUINT32x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -5636,9 +5619,8 @@
 }
 # define VUINT32x4_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_MIN_DEFINED
+#if !defined(VUINT32x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 mask;
@@ -5648,9 +5630,8 @@
 }
 # define VUINT32x4_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_MAX_DEFINED
+#if !defined(VUINT32x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 mask;
@@ -5660,30 +5641,8 @@
 }
 # define VUINT32x4_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_AVG_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
-{
-	vint32x4 ones = vint32x4_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT32x4_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT32x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_RSHIFT_DEFINED
+#if !defined(VUINT32x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -5691,9 +5650,8 @@
 }
 # define VUINT32x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x4_LRSHIFT_DEFINED
+#if !defined(VUINT32x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -5701,29 +5659,32 @@
 }
 # define VUINT32x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT32x4_NOT_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT32x4_NOT_DEFINED
-#endif
-
-
-/* vuint32x8 */
-
-#ifndef VINT32x8_SPLAT_DEFINED
+#if !defined(VUINT32x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x)
 {
 	vint32x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VINT32x8_SPLAT_DEFINED
 #endif
-#ifndef VINT32x8_LOAD_ALIGNED_DEFINED
+#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8])
 {
 	vint32x8 vec;
@@ -5732,7 +5693,7 @@
 }
 # define VINT32x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x8_LOAD_DEFINED
+#if !defined(VINT32x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8])
 {
 	vint32x8 vec;
@@ -5741,21 +5702,21 @@
 }
 # define VINT32x8_LOAD_DEFINED
 #endif
-#ifndef VINT32x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT32x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x8_STORE_DEFINED
-VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT32x8_STORE_DEFINED
 #endif
-#ifndef VINT32x8_ADD_DEFINED
+#if !defined(VINT32x8_ADD_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -5763,7 +5724,7 @@
 }
 # define VINT32x8_ADD_DEFINED
 #endif
-#ifndef VINT32x8_SUB_DEFINED
+#if !defined(VINT32x8_SUB_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -5771,7 +5732,7 @@
 }
 # define VINT32x8_SUB_DEFINED
 #endif
-#ifndef VINT32x8_MUL_DEFINED
+#if !defined(VINT32x8_MUL_DEFINED)
 VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -5779,106 +5740,8 @@
 }
 # define VINT32x8_MUL_DEFINED
 #endif
-#ifndef VINT32x8_AND_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_AND_DEFINED
-#endif
-#ifndef VINT32x8_OR_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_OR_DEFINED
-#endif
-#ifndef VINT32x8_XOR_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_CMPLT_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_CMPGT_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_CMPLE_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_CMPGE_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_MIN_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2)
-{
-	vint32x8 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x8_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_MAX_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2)
-{
-	vint32x8 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x8_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_AVG_DEFINED
+#if !defined(VINT32x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2)
 {
 	vint32x8 ones = vint32x8_splat(1);
@@ -5892,19 +5755,107 @@
 }
 # define VINT32x8_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT32x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_RSHIFT_DEFINED
+#if !defined(VINT32x8_AND_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_AND_DEFINED
+#endif
+#if !defined(VINT32x8_OR_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_OR_DEFINED
+#endif
+#if !defined(VINT32x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_XOR_DEFINED
+#endif
+#if !defined(VINT32x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x8_NOT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2)
+{
+	vint32x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x8_MIN_DEFINED
+#endif
+#if !defined(VINT32x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2)
+{
+	vint32x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x8_MAX_DEFINED
+#endif
+#if !defined(VINT32x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -5912,9 +5863,8 @@
 }
 # define VINT32x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x8_LRSHIFT_DEFINED
+#if !defined(VINT32x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -5922,29 +5872,32 @@
 }
 # define VINT32x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT32x8_NOT_DEFINED
-VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT32x8_NOT_DEFINED
-#endif
-
-
-/* vint32x8 */
-
-#ifndef VUINT32x8_SPLAT_DEFINED
+#if !defined(VINT32x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x)
 {
 	vuint32x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VUINT32x8_SPLAT_DEFINED
 #endif
-#ifndef VUINT32x8_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8])
 {
 	vuint32x8 vec;
@@ -5953,7 +5906,7 @@
 }
 # define VUINT32x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x8_LOAD_DEFINED
+#if !defined(VUINT32x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8])
 {
 	vuint32x8 vec;
@@ -5962,21 +5915,21 @@
 }
 # define VUINT32x8_LOAD_DEFINED
 #endif
-#ifndef VUINT32x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT32x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x8_STORE_DEFINED
-VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT32x8_STORE_DEFINED
 #endif
-#ifndef VUINT32x8_ADD_DEFINED
+#if !defined(VUINT32x8_ADD_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -5984,7 +5937,7 @@
 }
 # define VUINT32x8_ADD_DEFINED
 #endif
-#ifndef VUINT32x8_SUB_DEFINED
+#if !defined(VUINT32x8_SUB_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -5992,7 +5945,7 @@
 }
 # define VUINT32x8_SUB_DEFINED
 #endif
-#ifndef VUINT32x8_MUL_DEFINED
+#if !defined(VUINT32x8_MUL_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -6000,7 +5953,16 @@
 }
 # define VUINT32x8_MUL_DEFINED
 #endif
-#ifndef VUINT32x8_AND_DEFINED
+#if !defined(VUINT32x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x8_AVG_DEFINED
+#endif
+#if !defined(VUINT32x8_AND_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -6008,7 +5970,7 @@
 }
 # define VUINT32x8_AND_DEFINED
 #endif
-#ifndef VUINT32x8_OR_DEFINED
+#if !defined(VUINT32x8_OR_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -6016,7 +5978,7 @@
 }
 # define VUINT32x8_OR_DEFINED
 #endif
-#ifndef VUINT32x8_XOR_DEFINED
+#if !defined(VUINT32x8_XOR_DEFINED)
 VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -6024,8 +5986,16 @@
 }
 # define VUINT32x8_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_CMPLT_DEFINED
+#if !defined(VUINT32x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x8_NOT_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -6033,9 +6003,8 @@
 }
 # define VUINT32x8_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_CMPEQ_DEFINED
+#if !defined(VUINT32x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -6043,9 +6012,8 @@
 }
 # define VUINT32x8_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_CMPGT_DEFINED
+#if !defined(VUINT32x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -6053,9 +6021,8 @@
 }
 # define VUINT32x8_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_CMPLE_DEFINED
+#if !defined(VUINT32x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -6063,9 +6030,8 @@
 }
 # define VUINT32x8_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_CMPGE_DEFINED
+#if !defined(VUINT32x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -6073,9 +6039,8 @@
 }
 # define VUINT32x8_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_MIN_DEFINED
+#if !defined(VUINT32x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vuint32x8 mask;
@@ -6085,9 +6050,8 @@
 }
 # define VUINT32x8_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_MAX_DEFINED
+#if !defined(VUINT32x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vuint32x8 mask;
@@ -6097,30 +6061,8 @@
 }
 # define VUINT32x8_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_AVG_DEFINED
-VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2)
-{
-	vint32x8 ones = vint32x8_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT32x8_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT32x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_RSHIFT_DEFINED
+#if !defined(VUINT32x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -6128,9 +6070,8 @@
 }
 # define VUINT32x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x8_LRSHIFT_DEFINED
+#if !defined(VUINT32x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -6138,29 +6079,40 @@
 }
 # define VUINT32x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT32x8_NOT_DEFINED
-VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT32x8_NOT_DEFINED
-#endif
-
-
-/* vuint32x16 */
-
-#ifndef VINT32x16_SPLAT_DEFINED
+#if !defined(VUINT32x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x)
 {
 	vint32x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VINT32x16_SPLAT_DEFINED
 #endif
-#ifndef VINT32x16_LOAD_ALIGNED_DEFINED
+#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16])
 {
 	vint32x16 vec;
@@ -6169,7 +6121,7 @@
 }
 # define VINT32x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x16_LOAD_DEFINED
+#if !defined(VINT32x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16])
 {
 	vint32x16 vec;
@@ -6178,21 +6130,21 @@
 }
 # define VINT32x16_LOAD_DEFINED
 #endif
-#ifndef VINT32x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT32x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x16_STORE_DEFINED
-VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT32x16_STORE_DEFINED
 #endif
-#ifndef VINT32x16_ADD_DEFINED
+#if !defined(VINT32x16_ADD_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -6200,7 +6152,7 @@
 }
 # define VINT32x16_ADD_DEFINED
 #endif
-#ifndef VINT32x16_SUB_DEFINED
+#if !defined(VINT32x16_SUB_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -6208,7 +6160,7 @@
 }
 # define VINT32x16_SUB_DEFINED
 #endif
-#ifndef VINT32x16_MUL_DEFINED
+#if !defined(VINT32x16_MUL_DEFINED)
 VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -6216,106 +6168,8 @@
 }
 # define VINT32x16_MUL_DEFINED
 #endif
-#ifndef VINT32x16_AND_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_AND_DEFINED
-#endif
-#ifndef VINT32x16_OR_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_OR_DEFINED
-#endif
-#ifndef VINT32x16_XOR_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_CMPLT_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_CMPGT_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_CMPLE_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_CMPGE_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_MIN_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2)
-{
-	vint32x16 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x16_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_MAX_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2)
-{
-	vint32x16 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT32x16_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_AVG_DEFINED
+#if !defined(VINT32x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2)
 {
 	vint32x16 ones = vint32x16_splat(1);
@@ -6329,19 +6183,107 @@
 }
 # define VINT32x16_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT32x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_RSHIFT_DEFINED
+#if !defined(VINT32x16_AND_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_AND_DEFINED
+#endif
+#if !defined(VINT32x16_OR_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_OR_DEFINED
+#endif
+#if !defined(VINT32x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_XOR_DEFINED
+#endif
+#if !defined(VINT32x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x16_NOT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2)
+{
+	vint32x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x16_MIN_DEFINED
+#endif
+#if !defined(VINT32x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2)
+{
+	vint32x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x16_MAX_DEFINED
+#endif
+#if !defined(VINT32x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -6349,9 +6291,8 @@
 }
 # define VINT32x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT32x16_LRSHIFT_DEFINED
+#if !defined(VINT32x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -6359,29 +6300,40 @@
 }
 # define VINT32x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT32x16_NOT_DEFINED
-VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT32x16_NOT_DEFINED
-#endif
-
-
-/* vint32x16 */
-
-#ifndef VUINT32x16_SPLAT_DEFINED
+#if !defined(VINT32x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x)
 {
 	vuint32x16 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
 	return vec;
 }
 # define VUINT32x16_SPLAT_DEFINED
 #endif
-#ifndef VUINT32x16_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16])
 {
 	vuint32x16 vec;
@@ -6390,7 +6342,7 @@
 }
 # define VUINT32x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x16_LOAD_DEFINED
+#if !defined(VUINT32x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16])
 {
 	vuint32x16 vec;
@@ -6399,21 +6351,21 @@
 }
 # define VUINT32x16_LOAD_DEFINED
 #endif
-#ifndef VUINT32x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 arr[16])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT32x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x16_STORE_DEFINED
-VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT32x16_STORE_DEFINED
 #endif
-#ifndef VUINT32x16_ADD_DEFINED
+#if !defined(VUINT32x16_ADD_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -6421,7 +6373,7 @@
 }
 # define VUINT32x16_ADD_DEFINED
 #endif
-#ifndef VUINT32x16_SUB_DEFINED
+#if !defined(VUINT32x16_SUB_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -6429,7 +6381,7 @@
 }
 # define VUINT32x16_SUB_DEFINED
 #endif
-#ifndef VUINT32x16_MUL_DEFINED
+#if !defined(VUINT32x16_MUL_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -6437,7 +6389,16 @@
 }
 # define VUINT32x16_MUL_DEFINED
 #endif
-#ifndef VUINT32x16_AND_DEFINED
+#if !defined(VUINT32x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x16_AVG_DEFINED
+#endif
+#if !defined(VUINT32x16_AND_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -6445,7 +6406,7 @@
 }
 # define VUINT32x16_AND_DEFINED
 #endif
-#ifndef VUINT32x16_OR_DEFINED
+#if !defined(VUINT32x16_OR_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -6453,7 +6414,7 @@
 }
 # define VUINT32x16_OR_DEFINED
 #endif
-#ifndef VUINT32x16_XOR_DEFINED
+#if !defined(VUINT32x16_XOR_DEFINED)
 VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -6461,8 +6422,16 @@
 }
 # define VUINT32x16_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_CMPLT_DEFINED
+#if !defined(VUINT32x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x16_NOT_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -6470,9 +6439,8 @@
 }
 # define VUINT32x16_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_CMPEQ_DEFINED
+#if !defined(VUINT32x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -6480,9 +6448,8 @@
 }
 # define VUINT32x16_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_CMPGT_DEFINED
+#if !defined(VUINT32x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -6490,9 +6457,8 @@
 }
 # define VUINT32x16_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_CMPLE_DEFINED
+#if !defined(VUINT32x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -6500,9 +6466,8 @@
 }
 # define VUINT32x16_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_CMPGE_DEFINED
+#if !defined(VUINT32x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -6510,9 +6475,8 @@
 }
 # define VUINT32x16_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_MIN_DEFINED
+#if !defined(VUINT32x16_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vuint32x16 mask;
@@ -6522,9 +6486,8 @@
 }
 # define VUINT32x16_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_MAX_DEFINED
+#if !defined(VUINT32x16_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vuint32x16 mask;
@@ -6534,30 +6497,8 @@
 }
 # define VUINT32x16_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_AVG_DEFINED
-VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2)
-{
-	vint32x16 ones = vint32x16_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT32x16_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT32x16_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_RSHIFT_DEFINED
+#if !defined(VUINT32x16_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -6565,9 +6506,8 @@
 }
 # define VUINT32x16_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT32x16_LRSHIFT_DEFINED
+#if !defined(VUINT32x16_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -6575,29 +6515,26 @@
 }
 # define VUINT32x16_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT32x16_NOT_DEFINED
-VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT32x16_NOT_DEFINED
-#endif
-
-
-/* vuint64x2 */
-
-#ifndef VINT64x2_SPLAT_DEFINED
+#if !defined(VUINT32x16_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x)
 {
 	vint64x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VINT64x2_SPLAT_DEFINED
 #endif
-#ifndef VINT64x2_LOAD_ALIGNED_DEFINED
+#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2])
 {
 	vint64x2 vec;
@@ -6606,7 +6543,7 @@
 }
 # define VINT64x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x2_LOAD_DEFINED
+#if !defined(VINT64x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2])
 {
 	vint64x2 vec;
@@ -6615,21 +6552,21 @@
 }
 # define VINT64x2_LOAD_DEFINED
 #endif
-#ifndef VINT64x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT64x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x2_STORE_DEFINED
-VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT64x2_STORE_DEFINED
 #endif
-#ifndef VINT64x2_ADD_DEFINED
+#if !defined(VINT64x2_ADD_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -6637,7 +6574,7 @@
 }
 # define VINT64x2_ADD_DEFINED
 #endif
-#ifndef VINT64x2_SUB_DEFINED
+#if !defined(VINT64x2_SUB_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -6645,7 +6582,7 @@
 }
 # define VINT64x2_SUB_DEFINED
 #endif
-#ifndef VINT64x2_MUL_DEFINED
+#if !defined(VINT64x2_MUL_DEFINED)
 VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -6653,106 +6590,8 @@
 }
 # define VINT64x2_MUL_DEFINED
 #endif
-#ifndef VINT64x2_AND_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_AND_DEFINED
-#endif
-#ifndef VINT64x2_OR_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_OR_DEFINED
-#endif
-#ifndef VINT64x2_XOR_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_CMPLT_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_CMPGT_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_CMPLE_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_CMPGE_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_MIN_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2)
-{
-	vint64x2 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x2_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_MAX_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2)
-{
-	vint64x2 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x2_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_AVG_DEFINED
+#if !defined(VINT64x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2)
 {
 	vint64x2 ones = vint64x2_splat(1);
@@ -6766,19 +6605,107 @@
 }
 # define VINT64x2_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT64x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_RSHIFT_DEFINED
+#if !defined(VINT64x2_AND_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_AND_DEFINED
+#endif
+#if !defined(VINT64x2_OR_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_OR_DEFINED
+#endif
+#if !defined(VINT64x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_XOR_DEFINED
+#endif
+#if !defined(VINT64x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x2_NOT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2)
+{
+	vint64x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x2_MIN_DEFINED
+#endif
+#if !defined(VINT64x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2)
+{
+	vint64x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x2_MAX_DEFINED
+#endif
+#if !defined(VINT64x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -6786,9 +6713,8 @@
 }
 # define VINT64x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x2_LRSHIFT_DEFINED
+#if !defined(VINT64x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -6796,29 +6722,26 @@
 }
 # define VINT64x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT64x2_NOT_DEFINED
-VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT64x2_NOT_DEFINED
-#endif
-
-
-/* vint64x2 */
-
-#ifndef VUINT64x2_SPLAT_DEFINED
+#if !defined(VINT64x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x2_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x)
 {
 	vuint64x2 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
 	return vec;
 }
 # define VUINT64x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2])
 {
 	vuint64x2 vec;
@@ -6827,7 +6750,7 @@
 }
 # define VUINT64x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x2_LOAD_DEFINED
+#if !defined(VUINT64x2_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2])
 {
 	vuint64x2 vec;
@@ -6836,21 +6759,21 @@
 }
 # define VUINT64x2_LOAD_DEFINED
 #endif
-#ifndef VUINT64x2_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 arr[2])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT64x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x2_STORE_DEFINED
-VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 arr[2])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT64x2_STORE_DEFINED
 #endif
-#ifndef VUINT64x2_ADD_DEFINED
+#if !defined(VUINT64x2_ADD_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -6858,7 +6781,7 @@
 }
 # define VUINT64x2_ADD_DEFINED
 #endif
-#ifndef VUINT64x2_SUB_DEFINED
+#if !defined(VUINT64x2_SUB_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -6866,7 +6789,7 @@
 }
 # define VUINT64x2_SUB_DEFINED
 #endif
-#ifndef VUINT64x2_MUL_DEFINED
+#if !defined(VUINT64x2_MUL_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -6874,7 +6797,16 @@
 }
 # define VUINT64x2_MUL_DEFINED
 #endif
-#ifndef VUINT64x2_AND_DEFINED
+#if !defined(VUINT64x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x2_AVG_DEFINED
+#endif
+#if !defined(VUINT64x2_AND_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -6882,7 +6814,7 @@
 }
 # define VUINT64x2_AND_DEFINED
 #endif
-#ifndef VUINT64x2_OR_DEFINED
+#if !defined(VUINT64x2_OR_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -6890,7 +6822,7 @@
 }
 # define VUINT64x2_OR_DEFINED
 #endif
-#ifndef VUINT64x2_XOR_DEFINED
+#if !defined(VUINT64x2_XOR_DEFINED)
 VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -6898,8 +6830,16 @@
 }
 # define VUINT64x2_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_CMPLT_DEFINED
+#if !defined(VUINT64x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x2_NOT_DEFINED
+#endif
+#if !defined(VUINT64x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -6907,9 +6847,8 @@
 }
 # define VUINT64x2_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_CMPEQ_DEFINED
+#if !defined(VUINT64x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -6917,9 +6856,8 @@
 }
 # define VUINT64x2_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_CMPGT_DEFINED
+#if !defined(VUINT64x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -6927,9 +6865,8 @@
 }
 # define VUINT64x2_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_CMPLE_DEFINED
+#if !defined(VUINT64x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -6937,9 +6874,8 @@
 }
 # define VUINT64x2_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_CMPGE_DEFINED
+#if !defined(VUINT64x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -6947,9 +6883,8 @@
 }
 # define VUINT64x2_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_MIN_DEFINED
+#if !defined(VUINT64x2_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vuint64x2 mask;
@@ -6959,9 +6894,8 @@
 }
 # define VUINT64x2_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_MAX_DEFINED
+#if !defined(VUINT64x2_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vuint64x2 mask;
@@ -6971,30 +6905,8 @@
 }
 # define VUINT64x2_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_AVG_DEFINED
-VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2)
-{
-	vint64x2 ones = vint64x2_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT64x2_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT64x2_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_RSHIFT_DEFINED
+#if !defined(VUINT64x2_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -7002,9 +6914,8 @@
 }
 # define VUINT64x2_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x2_LRSHIFT_DEFINED
+#if !defined(VUINT64x2_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
@@ -7012,29 +6923,28 @@
 }
 # define VUINT64x2_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT64x2_NOT_DEFINED
-VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT64x2_NOT_DEFINED
-#endif
-
-
-/* vuint64x4 */
-
-#ifndef VINT64x4_SPLAT_DEFINED
+#if !defined(VUINT64x2_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x)
 {
 	vint64x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VINT64x4_SPLAT_DEFINED
 #endif
-#ifndef VINT64x4_LOAD_ALIGNED_DEFINED
+#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4])
 {
 	vint64x4 vec;
@@ -7043,7 +6953,7 @@
 }
 # define VINT64x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x4_LOAD_DEFINED
+#if !defined(VINT64x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4])
 {
 	vint64x4 vec;
@@ -7052,21 +6962,21 @@
 }
 # define VINT64x4_LOAD_DEFINED
 #endif
-#ifndef VINT64x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT64x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x4_STORE_DEFINED
-VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT64x4_STORE_DEFINED
 #endif
-#ifndef VINT64x4_ADD_DEFINED
+#if !defined(VINT64x4_ADD_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -7074,7 +6984,7 @@
 }
 # define VINT64x4_ADD_DEFINED
 #endif
-#ifndef VINT64x4_SUB_DEFINED
+#if !defined(VINT64x4_SUB_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -7082,7 +6992,7 @@
 }
 # define VINT64x4_SUB_DEFINED
 #endif
-#ifndef VINT64x4_MUL_DEFINED
+#if !defined(VINT64x4_MUL_DEFINED)
 VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -7090,106 +7000,8 @@
 }
 # define VINT64x4_MUL_DEFINED
 #endif
-#ifndef VINT64x4_AND_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_AND_DEFINED
-#endif
-#ifndef VINT64x4_OR_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_OR_DEFINED
-#endif
-#ifndef VINT64x4_XOR_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_CMPLT_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_CMPGT_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_CMPLE_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_CMPGE_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_MIN_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2)
-{
-	vint64x4 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x4_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_MAX_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2)
-{
-	vint64x4 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x4_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_AVG_DEFINED
+#if !defined(VINT64x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2)
 {
 	vint64x4 ones = vint64x4_splat(1);
@@ -7203,19 +7015,107 @@
 }
 # define VINT64x4_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT64x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_RSHIFT_DEFINED
+#if !defined(VINT64x4_AND_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_AND_DEFINED
+#endif
+#if !defined(VINT64x4_OR_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_OR_DEFINED
+#endif
+#if !defined(VINT64x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_XOR_DEFINED
+#endif
+#if !defined(VINT64x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x4_NOT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2)
+{
+	vint64x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x4_MIN_DEFINED
+#endif
+#if !defined(VINT64x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2)
+{
+	vint64x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x4_MAX_DEFINED
+#endif
+#if !defined(VINT64x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -7223,9 +7123,8 @@
 }
 # define VINT64x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x4_LRSHIFT_DEFINED
+#if !defined(VINT64x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -7233,29 +7132,28 @@
 }
 # define VINT64x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT64x4_NOT_DEFINED
-VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT64x4_NOT_DEFINED
-#endif
-
-
-/* vint64x4 */
-
-#ifndef VUINT64x4_SPLAT_DEFINED
+#if !defined(VINT64x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x)
 {
 	vuint64x4 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
 	return vec;
 }
 # define VUINT64x4_SPLAT_DEFINED
 #endif
-#ifndef VUINT64x4_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4])
 {
 	vuint64x4 vec;
@@ -7264,7 +7162,7 @@
 }
 # define VUINT64x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x4_LOAD_DEFINED
+#if !defined(VUINT64x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4])
 {
 	vuint64x4 vec;
@@ -7273,21 +7171,21 @@
 }
 # define VUINT64x4_LOAD_DEFINED
 #endif
-#ifndef VUINT64x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 arr[4])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT64x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x4_STORE_DEFINED
-VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT64x4_STORE_DEFINED
 #endif
-#ifndef VUINT64x4_ADD_DEFINED
+#if !defined(VUINT64x4_ADD_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -7295,7 +7193,7 @@
 }
 # define VUINT64x4_ADD_DEFINED
 #endif
-#ifndef VUINT64x4_SUB_DEFINED
+#if !defined(VUINT64x4_SUB_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -7303,7 +7201,7 @@
 }
 # define VUINT64x4_SUB_DEFINED
 #endif
-#ifndef VUINT64x4_MUL_DEFINED
+#if !defined(VUINT64x4_MUL_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -7311,7 +7209,16 @@
 }
 # define VUINT64x4_MUL_DEFINED
 #endif
-#ifndef VUINT64x4_AND_DEFINED
+#if !defined(VUINT64x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x4_AVG_DEFINED
+#endif
+#if !defined(VUINT64x4_AND_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -7319,7 +7226,7 @@
 }
 # define VUINT64x4_AND_DEFINED
 #endif
-#ifndef VUINT64x4_OR_DEFINED
+#if !defined(VUINT64x4_OR_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -7327,7 +7234,7 @@
 }
 # define VUINT64x4_OR_DEFINED
 #endif
-#ifndef VUINT64x4_XOR_DEFINED
+#if !defined(VUINT64x4_XOR_DEFINED)
 VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -7335,8 +7242,16 @@
 }
 # define VUINT64x4_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_CMPLT_DEFINED
+#if !defined(VUINT64x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x4_NOT_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -7344,9 +7259,8 @@
 }
 # define VUINT64x4_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_CMPEQ_DEFINED
+#if !defined(VUINT64x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -7354,9 +7268,8 @@
 }
 # define VUINT64x4_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_CMPGT_DEFINED
+#if !defined(VUINT64x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -7364,9 +7277,8 @@
 }
 # define VUINT64x4_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_CMPLE_DEFINED
+#if !defined(VUINT64x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -7374,9 +7286,8 @@
 }
 # define VUINT64x4_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_CMPGE_DEFINED
+#if !defined(VUINT64x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -7384,9 +7295,8 @@
 }
 # define VUINT64x4_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_MIN_DEFINED
+#if !defined(VUINT64x4_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vuint64x4 mask;
@@ -7396,9 +7306,8 @@
 }
 # define VUINT64x4_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_MAX_DEFINED
+#if !defined(VUINT64x4_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vuint64x4 mask;
@@ -7408,30 +7317,8 @@
 }
 # define VUINT64x4_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_AVG_DEFINED
-VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2)
-{
-	vint64x4 ones = vint64x4_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT64x4_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VUINT64x4_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_RSHIFT_DEFINED
+#if !defined(VUINT64x4_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -7439,9 +7326,8 @@
 }
 # define VUINT64x4_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x4_LRSHIFT_DEFINED
+#if !defined(VUINT64x4_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
@@ -7449,29 +7335,32 @@
 }
 # define VUINT64x4_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VUINT64x4_NOT_DEFINED
-VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT64x4_NOT_DEFINED
-#endif
-
-
-/* vuint64x8 */
-
-#ifndef VINT64x8_SPLAT_DEFINED
+#if !defined(VUINT64x4_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x)
 {
 	vint64x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VINT64x8_SPLAT_DEFINED
 #endif
-#ifndef VINT64x8_LOAD_ALIGNED_DEFINED
+#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8])
 {
 	vint64x8 vec;
@@ -7480,7 +7369,7 @@
 }
 # define VINT64x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x8_LOAD_DEFINED
+#if !defined(VINT64x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8])
 {
 	vint64x8 vec;
@@ -7489,21 +7378,21 @@
 }
 # define VINT64x8_LOAD_DEFINED
 #endif
-#ifndef VINT64x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VINT64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VINT64x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x8_STORE_DEFINED
-VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VINT64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VINT64x8_STORE_DEFINED
 #endif
-#ifndef VINT64x8_ADD_DEFINED
+#if !defined(VINT64x8_ADD_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -7511,7 +7400,7 @@
 }
 # define VINT64x8_ADD_DEFINED
 #endif
-#ifndef VINT64x8_SUB_DEFINED
+#if !defined(VINT64x8_SUB_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -7519,7 +7408,7 @@
 }
 # define VINT64x8_SUB_DEFINED
 #endif
-#ifndef VINT64x8_MUL_DEFINED
+#if !defined(VINT64x8_MUL_DEFINED)
 VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -7527,106 +7416,8 @@
 }
 # define VINT64x8_MUL_DEFINED
 #endif
-#ifndef VINT64x8_AND_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc & vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_AND_DEFINED
-#endif
-#ifndef VINT64x8_OR_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc | vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_OR_DEFINED
-#endif
-#ifndef VINT64x8_XOR_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc ^ vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_XOR_DEFINED
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_CMPLT_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc < vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_CMPLT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_CMPEQ_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc == vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_CMPEQ_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_CMPGT_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc > vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_CMPGT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_CMPLE_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc <= vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_CMPLE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_CMPGE_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc >= vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_CMPGE_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_MIN_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2)
-{
-	vint64x8 mask;
-	mask.gcc = (vec1.gcc < vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x8_MIN_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_MAX_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2)
-{
-	vint64x8 mask;
-	mask.gcc = (vec1.gcc > vec2.gcc);
-	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
-	return vec1;
-}
-# define VINT64x8_MAX_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_AVG_DEFINED
+#if !defined(VINT64x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2)
 {
 	vint64x8 ones = vint64x8_splat(1);
@@ -7640,19 +7431,107 @@
 }
 # define VINT64x8_AVG_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc << vec2.gcc);
-	return vec1;
-}
-# define VINT64x8_LSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_RSHIFT_DEFINED
+#if !defined(VINT64x8_AND_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_AND_DEFINED
+#endif
+#if !defined(VINT64x8_OR_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_OR_DEFINED
+#endif
+#if !defined(VINT64x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_XOR_DEFINED
+#endif
+#if !defined(VINT64x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x8_NOT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2)
+{
+	vint64x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x8_MIN_DEFINED
+#endif
+#if !defined(VINT64x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2)
+{
+	vint64x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x8_MAX_DEFINED
+#endif
+#if !defined(VINT64x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >> vec2.gcc);
@@ -7660,9 +7539,8 @@
 }
 # define VINT64x8_RSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VINT64x8_LRSHIFT_DEFINED
+#if !defined(VINT64x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
@@ -7670,29 +7548,32 @@
 }
 # define VINT64x8_LRSHIFT_DEFINED
 #endif
-#endif
-#ifndef VINT64x8_NOT_DEFINED
-VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VINT64x8_NOT_DEFINED
-#endif
-
-
-/* vint64x8 */
-
-#ifndef VUINT64x8_SPLAT_DEFINED
+#if !defined(VINT64x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x)
 {
 	vuint64x8 vec;
-	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
 	return vec;
 }
 # define VUINT64x8_SPLAT_DEFINED
 #endif
-#ifndef VUINT64x8_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8])
 {
 	vuint64x8 vec;
@@ -7701,7 +7582,7 @@
 }
 # define VUINT64x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x8_LOAD_DEFINED
+#if !defined(VUINT64x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8])
 {
 	vuint64x8 vec;
@@ -7710,21 +7591,21 @@
 }
 # define VUINT64x8_LOAD_DEFINED
 #endif
-#ifndef VUINT64x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 arr[8])
-{
-	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
 }
 # define VUINT64x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x8_STORE_DEFINED
-VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
+#if !defined(VUINT64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
 }
 # define VUINT64x8_STORE_DEFINED
 #endif
-#ifndef VUINT64x8_ADD_DEFINED
+#if !defined(VUINT64x8_ADD_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc + vec2.gcc);
@@ -7732,7 +7613,7 @@
 }
 # define VUINT64x8_ADD_DEFINED
 #endif
-#ifndef VUINT64x8_SUB_DEFINED
+#if !defined(VUINT64x8_SUB_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc - vec2.gcc);
@@ -7740,7 +7621,7 @@
 }
 # define VUINT64x8_SUB_DEFINED
 #endif
-#ifndef VUINT64x8_MUL_DEFINED
+#if !defined(VUINT64x8_MUL_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc * vec2.gcc);
@@ -7748,7 +7629,16 @@
 }
 # define VUINT64x8_MUL_DEFINED
 #endif
-#ifndef VUINT64x8_AND_DEFINED
+#if !defined(VUINT64x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x8_AVG_DEFINED
+#endif
+#if !defined(VUINT64x8_AND_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc & vec2.gcc);
@@ -7756,7 +7646,7 @@
 }
 # define VUINT64x8_AND_DEFINED
 #endif
-#ifndef VUINT64x8_OR_DEFINED
+#if !defined(VUINT64x8_OR_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc | vec2.gcc);
@@ -7764,7 +7654,7 @@
 }
 # define VUINT64x8_OR_DEFINED
 #endif
-#ifndef VUINT64x8_XOR_DEFINED
+#if !defined(VUINT64x8_XOR_DEFINED)
 VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc ^ vec2.gcc);
@@ -7772,8 +7662,16 @@
 }
 # define VUINT64x8_XOR_DEFINED
 #endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_CMPLT_DEFINED
+#if !defined(VUINT64x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x8_NOT_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc < vec2.gcc);
@@ -7781,9 +7679,8 @@
 }
 # define VUINT64x8_CMPLT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_CMPEQ_DEFINED
+#if !defined(VUINT64x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc == vec2.gcc);
@@ -7791,9 +7688,8 @@
 }
 # define VUINT64x8_CMPEQ_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_CMPGT_DEFINED
+#if !defined(VUINT64x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc > vec2.gcc);
@@ -7801,9 +7697,8 @@
 }
 # define VUINT64x8_CMPGT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_CMPLE_DEFINED
+#if !defined(VUINT64x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc <= vec2.gcc);
@@ -7811,9 +7706,8 @@
 }
 # define VUINT64x8_CMPLE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_CMPGE_DEFINED
+#if !defined(VUINT64x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc >= vec2.gcc);
@@ -7821,9 +7715,8 @@
 }
 # define VUINT64x8_CMPGE_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_MIN_DEFINED
+#if !defined(VUINT64x8_MIN_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vuint64x8 mask;
@@ -7833,9 +7726,8 @@
 }
 # define VUINT64x8_MIN_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_MAX_DEFINED
+#if !defined(VUINT64x8_MAX_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vuint64x8 mask;
@@ -7845,20 +7737,26 @@
 }
 # define VUINT64x8_MAX_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_AVG_DEFINED
-VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2)
-{
-	vint64x8 ones = vint64x8_splat(1);
-	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);
-	return vec1;
-}
-# define VUINT64x8_AVG_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_LSHIFT_DEFINED
+#if !defined(VUINT64x8_RSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_LRSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_LSHIFT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
 VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2)
 {
 	vec1.gcc = (vec1.gcc << vec2.gcc);
@@ -7866,34 +7764,873 @@
 }
 # define VUINT64x8_LSHIFT_DEFINED
 #endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_RSHIFT_DEFINED
-VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2)
-{
-	vec1.gcc = (vec1.gcc >> vec2.gcc);
-	return vec1;
-}
-# define VUINT64x8_RSHIFT_DEFINED
-#endif
-#endif
-#if VEC_GNUC_ATLEAST(4, 3, 0)
-#ifndef VUINT64x8_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2)
-{
-	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
-	return vec1;
-}
-# define VUINT64x8_LRSHIFT_DEFINED
-#endif
-#endif
-#ifndef VUINT64x8_NOT_DEFINED
-VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec)
-{
-	vec.gcc = ~vec.gcc;
-	return vec;
-}
-# define VUINT64x8_NOT_DEFINED
-#endif
-#endif /* VEC_IMPL_GCC_H_ */
-
+#if !defined(VF32x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x)
+{
+	vf32x2 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	return vec;
+}
+# define VF32x2_SPLAT_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF32x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF32x2_LOAD_DEFINED
+#endif
+#if !defined(VF32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF32x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF32x2_STORE_DEFINED
+#endif
+#if !defined(VF32x2_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF32x2_ADD_DEFINED
+#endif
+#if !defined(VF32x2_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF32x2_SUB_DEFINED
+#endif
+#if !defined(VF32x2_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF32x2_MUL_DEFINED
+#endif
+#if !defined(VF32x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF32x2_AVG_DEFINED
+#endif
+#if !defined(VF32x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF32x2_CMPLT_DEFINED
+#endif
+#if !defined(VF32x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF32x2_CMPGT_DEFINED
+#endif
+#if !defined(VF32x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF32x2_CMPLE_DEFINED
+#endif
+#if !defined(VF32x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF32x2_CMPGE_DEFINED
+#endif
+#if !defined(VF32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x)
+{
+	vf32x4 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	return vec;
+}
+# define VF32x4_SPLAT_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF32x4_LOAD_DEFINED
+#endif
+#if !defined(VF32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF32x4_STORE_DEFINED
+#endif
+#if !defined(VF32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF32x4_ADD_DEFINED
+#endif
+#if !defined(VF32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF32x4_SUB_DEFINED
+#endif
+#if !defined(VF32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF32x4_MUL_DEFINED
+#endif
+#if !defined(VF32x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF32x4_AVG_DEFINED
+#endif
+#if !defined(VF32x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF32x4_CMPLT_DEFINED
+#endif
+#if !defined(VF32x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF32x4_CMPGT_DEFINED
+#endif
+#if !defined(VF32x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF32x4_CMPLE_DEFINED
+#endif
+#if !defined(VF32x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF32x4_CMPGE_DEFINED
+#endif
+#if !defined(VF32x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x)
+{
+	vf32x8 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	return vec;
+}
+# define VF32x8_SPLAT_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF32x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF32x8_LOAD_DEFINED
+#endif
+#if !defined(VF32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF32x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF32x8_STORE_DEFINED
+#endif
+#if !defined(VF32x8_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF32x8_ADD_DEFINED
+#endif
+#if !defined(VF32x8_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF32x8_SUB_DEFINED
+#endif
+#if !defined(VF32x8_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF32x8_MUL_DEFINED
+#endif
+#if !defined(VF32x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF32x8_AVG_DEFINED
+#endif
+#if !defined(VF32x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF32x8_CMPLT_DEFINED
+#endif
+#if !defined(VF32x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF32x8_CMPGT_DEFINED
+#endif
+#if !defined(VF32x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF32x8_CMPLE_DEFINED
+#endif
+#if !defined(VF32x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF32x8_CMPGE_DEFINED
+#endif
+#if !defined(VF32x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x)
+{
+	vf32x16 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	vec.gcc[8] = x;
+	vec.gcc[9] = x;
+	vec.gcc[10] = x;
+	vec.gcc[11] = x;
+	vec.gcc[12] = x;
+	vec.gcc[13] = x;
+	vec.gcc[14] = x;
+	vec.gcc[15] = x;
+	return vec;
+}
+# define VF32x16_SPLAT_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF32x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF32x16_LOAD_DEFINED
+#endif
+#if !defined(VF32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF32x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF32x16_STORE_DEFINED
+#endif
+#if !defined(VF32x16_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF32x16_ADD_DEFINED
+#endif
+#if !defined(VF32x16_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF32x16_SUB_DEFINED
+#endif
+#if !defined(VF32x16_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF32x16_MUL_DEFINED
+#endif
+#if !defined(VF32x16_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF32x16_AVG_DEFINED
+#endif
+#if !defined(VF32x16_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF32x16_CMPLT_DEFINED
+#endif
+#if !defined(VF32x16_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x16_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF32x16_CMPGT_DEFINED
+#endif
+#if !defined(VF32x16_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF32x16_CMPLE_DEFINED
+#endif
+#if !defined(VF32x16_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF32x16_CMPGE_DEFINED
+#endif
+#if !defined(VF64x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x)
+{
+	vf64x2 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	return vec;
+}
+# define VF64x2_SPLAT_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF64x2_LOAD_DEFINED
+#endif
+#if !defined(VF64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF64x2_STORE_DEFINED
+#endif
+#if !defined(VF64x2_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF64x2_ADD_DEFINED
+#endif
+#if !defined(VF64x2_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF64x2_SUB_DEFINED
+#endif
+#if !defined(VF64x2_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF64x2_MUL_DEFINED
+#endif
+#if !defined(VF64x2_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF64x2_AVG_DEFINED
+#endif
+#if !defined(VF64x2_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF64x2_CMPLT_DEFINED
+#endif
+#if !defined(VF64x2_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x2_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF64x2_CMPGT_DEFINED
+#endif
+#if !defined(VF64x2_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF64x2_CMPLE_DEFINED
+#endif
+#if !defined(VF64x2_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF64x2_CMPGE_DEFINED
+#endif
+#if !defined(VF64x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x)
+{
+	vf64x4 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	return vec;
+}
+# define VF64x4_SPLAT_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF64x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF64x4_LOAD_DEFINED
+#endif
+#if !defined(VF64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF64x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF64x4_STORE_DEFINED
+#endif
+#if !defined(VF64x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF64x4_ADD_DEFINED
+#endif
+#if !defined(VF64x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF64x4_SUB_DEFINED
+#endif
+#if !defined(VF64x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF64x4_MUL_DEFINED
+#endif
+#if !defined(VF64x4_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF64x4_AVG_DEFINED
+#endif
+#if !defined(VF64x4_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF64x4_CMPLT_DEFINED
+#endif
+#if !defined(VF64x4_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x4_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF64x4_CMPGT_DEFINED
+#endif
+#if !defined(VF64x4_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF64x4_CMPLE_DEFINED
+#endif
+#if !defined(VF64x4_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF64x4_CMPGE_DEFINED
+#endif
+#if !defined(VF64x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x)
+{
+	vf64x8 vec;
+	vec.gcc[0] = x;
+	vec.gcc[1] = x;
+	vec.gcc[2] = x;
+	vec.gcc[3] = x;
+	vec.gcc[4] = x;
+	vec.gcc[5] = x;
+	vec.gcc[6] = x;
+	vec.gcc[7] = x;
+	return vec;
+}
+# define VF64x8_SPLAT_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VF64x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VF64x8_LOAD_DEFINED
+#endif
+#if !defined(VF64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8])
+{
+	*(__typeof__(vec.gcc) *)x = vec.gcc;
+}
+# define VF64x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8])
+{
+	memcpy(x, &vec, sizeof(vec));
+}
+# define VF64x8_STORE_DEFINED
+#endif
+#if !defined(VF64x8_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VF64x8_ADD_DEFINED
+#endif
+#if !defined(VF64x8_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VF64x8_SUB_DEFINED
+#endif
+#if !defined(VF64x8_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VF64x8_MUL_DEFINED
+#endif
+#if !defined(VF64x8_AVG_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc) / 2;
+	return vec1;
+}
+# define VF64x8_AVG_DEFINED
+#endif
+#if !defined(VF64x8_CMPLT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VF64x8_CMPLT_DEFINED
+#endif
+#if !defined(VF64x8_CMPEQ_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VF64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x8_CMPGT_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VF64x8_CMPGT_DEFINED
+#endif
+#if !defined(VF64x8_CMPLE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VF64x8_CMPLE_DEFINED
+#endif
+#if !defined(VF64x8_CMPGE_DEFINED) \
+	 && (VEC_GNUC_ATLEAST(4, 3, 0))
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VF64x8_CMPGE_DEFINED
+#endif
--- a/include/vec/impl/generic.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/generic.h	Wed Apr 30 18:36:38 2025 -0400
@@ -25,1023 +25,19941 @@
 /* This file is automatically generated! Do not edit it directly!
  * Edit the code that generates it in utils/gengeneric.c  --paper */
 
-#ifndef VEC_IMPL_GENERIC_H_
-#define VEC_IMPL_GENERIC_H_
-
-#define VEC_GENERIC_OPERATION(op, sign, bits, size) \
-	do { \
-		int i; \
-	\
-		for (i = 0; i < size; i++) \
-			vec1.generic[i] = (op); \
-	\
-		return vec1; \
-	} while (0)
-
-#define VEC_GENERIC_BUILTIN_OPERATION(op, sign, bits, size) \
-	VEC_GENERIC_OPERATION(vec1.generic[i] op vec2.generic[i], sign, bits, size)
-
-#define VEC_GENERIC_CMP(op, sign, bits, size) \
-	VEC_GENERIC_OPERATION((vec1.generic[i] op vec2.generic[i]) ? (vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits) : 0, sign, bits, size)
-
-/* okay, now we can do this crap: */
-
-#define VEC_GENERIC_SPLAT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \
-	{ \
-		v##sign##int##bits##x##size vec; \
-		for (int i = 0; i < size; i++) \
-			vec.generic[i] = x; \
-		return vec; \
-	}
-
-#define VEC_GENERIC_LOAD_EX(name, sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits in[size]) \
-	{ \
-		v##sign##int##bits##x##size vec; \
-		memcpy(&vec, in, sizeof(vec_##sign##int##bits) * size); \
-		return vec; \
-	}
-
-#define VEC_GENERIC_LOAD_ALIGNED(sign, bits, size) VEC_GENERIC_LOAD_EX(load_aligned, sign, bits, size)
-#define VEC_GENERIC_LOAD(sign, bits, size) VEC_GENERIC_LOAD_EX(load, sign, bits, size)
-
-#define VEC_GENERIC_STORE_EX(name, sign, bits, size) \
-	VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
-	{ \
-		memcpy(out, &vec, sizeof(vec_##sign##int##bits) * size); \
-	}
-
-#define VEC_GENERIC_STORE_ALIGNED(sign, bits, size) VEC_GENERIC_STORE_EX(store_aligned, sign, bits, size)
-#define VEC_GENERIC_STORE(sign, bits, size) VEC_GENERIC_STORE_EX(store, sign, bits, size)
-
-#define VEC_GENERIC_ADD(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(+, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_SUB(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(-, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_MUL(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(*, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_DIV(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_MOD(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_AVG(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		for (int i = 0; i < size; i++) \
-			vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \
-	\
-		return vec1; \
-	}
-
-#define VEC_GENERIC_AND(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(&, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_OR(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(|, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_XOR(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_BUILTIN_OPERATION(^, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_NOT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \
-	{ \
-		return v##sign##int##bits##x##size##_xor(vec, v##sign##int##bits##x##size##_splat((vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits))); \
-	}
-
-#define VEC_GENERIC_CMPLT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_CMP(<, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_CMPLE(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmpgt(vec1, vec2)); \
-	}
-
-#define VEC_GENERIC_CMPEQ(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_CMP(==, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_CMPGE(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmplt(vec1, vec2)); \
-	}
-
-#define VEC_GENERIC_CMPGT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_CMP(>, sign, bits, size); \
-	}
-
-#define VEC_GENERIC_LSHIFT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_OPERATION(vec_##sign##lshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \
-	}
-
-#define VEC_GENERIC_RSHIFT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_OPERATION(vec_##sign##rshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \
-	}
-
-#define VEC_GENERIC_LRSHIFT(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \
-	{ \
-		VEC_GENERIC_OPERATION(vec_urshift((vec_uint##bits)vec1.generic[i], vec2.generic[i]), sign, bits, size); \
-	}
-
-#define VEC_GENERIC_MIN(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmplt(vec1, vec2); \
-	\
-		v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \
-		v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \
-	\
-		return v##sign##int##bits##x##size##_or(a, b); \
-	}
-
-#define VEC_GENERIC_MAX(sign, bits, size) \
-	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
-	{ \
-		v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmpgt(vec1, vec2); \
-	\
-		v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \
-		v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \
-	\
-		return v##sign##int##bits##x##size##_or(a, b); \
-	}
-
 /* ------------------------------------------------------------------------ */
 /* PREPROCESSOR HELL INCOMING */
 
-
-
-/* vuint8x2 */
-
-#ifndef VINT8x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x)
+{
+	vint8x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VINT8x2_SPLAT_DEFINED
 #endif
-#ifndef VINT8x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 8, 2)
+#if !defined(VINT8x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	memcpy(vec.generic, x, 2);
+	return vec;
+}
 # define VINT8x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(/* nothing */, 8, 2)
+#if !defined(VINT8x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	memcpy(vec.generic, x, 2);
+	return vec;
+}
 # define VINT8x2_LOAD_DEFINED
 #endif
-#ifndef VINT8x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(/* nothing */, 8, 2)
+#if !defined(VINT8x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 x[2])
+{
+	memcpy(x, vec.generic, 2);
+}
 # define VINT8x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x2_STORE_DEFINED
-VEC_GENERIC_STORE(/* nothing */, 8, 2)
+#if !defined(VINT8x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 x[2])
+{
+	memcpy(x, vec.generic, 2);
+}
 # define VINT8x2_STORE_DEFINED
 #endif
-#ifndef VINT8x2_ADD_DEFINED
-VEC_GENERIC_ADD(/* nothing */, 8, 2)
+#if !defined(VINT8x2_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_ADD_DEFINED
 #endif
-#ifndef VINT8x2_SUB_DEFINED
-VEC_GENERIC_SUB(/* nothing */, 8, 2)
+#if !defined(VINT8x2_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_SUB_DEFINED
 #endif
-#ifndef VINT8x2_MUL_DEFINED
-VEC_GENERIC_MUL(/* nothing */, 8, 2)
+#if !defined(VINT8x2_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_MUL_DEFINED
 #endif
-#ifndef VINT8x2_DIV_DEFINED
-VEC_GENERIC_DIV(/* nothing */, 8, 2)
+#if !defined(VINT8x2_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_div(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT8x2_DIV_DEFINED
 #endif
-#ifndef VINT8x2_MOD_DEFINED
-VEC_GENERIC_MOD(/* nothing */, 8, 2)
+#if !defined(VINT8x2_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_mod(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT8x2_MOD_DEFINED
 #endif
-#ifndef VINT8x2_AVG_DEFINED
-VEC_GENERIC_AVG(/* nothing */, 8, 2)
+#if !defined(VINT8x2_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
 # define VINT8x2_AVG_DEFINED
 #endif
-#ifndef VINT8x2_AND_DEFINED
-VEC_GENERIC_AND(/* nothing */, 8, 2)
+#if !defined(VINT8x2_AND_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_AND_DEFINED
 #endif
-#ifndef VINT8x2_OR_DEFINED
-VEC_GENERIC_OR(/* nothing */, 8, 2)
+#if !defined(VINT8x2_OR_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_OR_DEFINED
 #endif
-#ifndef VINT8x2_XOR_DEFINED
-VEC_GENERIC_XOR(/* nothing */, 8, 2)
+#if !defined(VINT8x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_XOR_DEFINED
 #endif
-#ifndef VINT8x2_NOT_DEFINED
-VEC_GENERIC_NOT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VINT8x2_NOT_DEFINED
 #endif
-#ifndef VINT8x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VINT8x2_CMPLT_DEFINED
 #endif
-#ifndef VINT8x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(/* nothing */, 8, 2)
+#if !defined(VINT8x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VINT8x2_CMPEQ_DEFINED
 #endif
-#ifndef VINT8x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VINT8x2_CMPGT_DEFINED
 #endif
-#ifndef VINT8x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(/* nothing */, 8, 2)
+#if !defined(VINT8x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VINT8x2_CMPLE_DEFINED
 #endif
-#ifndef VINT8x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(/* nothing */, 8, 2)
+#if !defined(VINT8x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VINT8x2_CMPGE_DEFINED
 #endif
-#ifndef VINT8x2_MIN_DEFINED
-VEC_GENERIC_MIN(/* nothing */, 8, 2)
+#if !defined(VINT8x2_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_MIN_DEFINED
 #endif
-#ifndef VINT8x2_MAX_DEFINED
-VEC_GENERIC_MAX(/* nothing */, 8, 2)
+#if !defined(VINT8x2_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_MAX_DEFINED
 #endif
-#ifndef VINT8x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+	return vec1;
+}
 # define VINT8x2_RSHIFT_DEFINED
 #endif
-#ifndef VINT8x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT8x2_LRSHIFT_DEFINED
 #endif
-#ifndef VINT8x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(/* nothing */, 8, 2)
+#if !defined(VINT8x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT8x2_LSHIFT_DEFINED
 #endif
-
-
-/* vint8x2 */
-
-#ifndef VUINT8x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(u, 8, 2)
+#if !defined(VUINT8x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x)
+{
+	vuint8x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VUINT8x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(u, 8, 2)
+#if !defined(VUINT8x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	memcpy(vec.generic, x, 2);
+	return vec;
+}
 # define VUINT8x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(u, 8, 2)
+#if !defined(VUINT8x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	memcpy(vec.generic, x, 2);
+	return vec;
+}
 # define VUINT8x2_LOAD_DEFINED
 #endif
-#ifndef VUINT8x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(u, 8, 2)
+#if !defined(VUINT8x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 x[2])
+{
+	memcpy(x, vec.generic, 2);
+}
 # define VUINT8x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x2_STORE_DEFINED
-VEC_GENERIC_STORE(u, 8, 2)
+#if !defined(VUINT8x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 x[2])
+{
+	memcpy(x, vec.generic, 2);
+}
 # define VUINT8x2_STORE_DEFINED
 #endif
-#ifndef VUINT8x2_ADD_DEFINED
-VEC_GENERIC_ADD(u, 8, 2)
+#if !defined(VUINT8x2_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_ADD_DEFINED
 #endif
-#ifndef VUINT8x2_SUB_DEFINED
-VEC_GENERIC_SUB(u, 8, 2)
+#if !defined(VUINT8x2_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_SUB_DEFINED
 #endif
-#ifndef VUINT8x2_MUL_DEFINED
-VEC_GENERIC_MUL(u, 8, 2)
+#if !defined(VUINT8x2_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_MUL_DEFINED
 #endif
-#ifndef VUINT8x2_DIV_DEFINED
-VEC_GENERIC_DIV(u, 8, 2)
+#if !defined(VUINT8x2_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_div(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT8x2_DIV_DEFINED
 #endif
-#ifndef VUINT8x2_MOD_DEFINED
-VEC_GENERIC_MOD(u, 8, 2)
+#if !defined(VUINT8x2_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_mod(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT8x2_MOD_DEFINED
 #endif
-#ifndef VUINT8x2_AVG_DEFINED
-VEC_GENERIC_AVG(u, 8, 2)
+#if !defined(VUINT8x2_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+	return vec1;
+}
 # define VUINT8x2_AVG_DEFINED
 #endif
-#ifndef VUINT8x2_AND_DEFINED
-VEC_GENERIC_AND(u, 8, 2)
+#if !defined(VUINT8x2_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_AND_DEFINED
 #endif
-#ifndef VUINT8x2_OR_DEFINED
-VEC_GENERIC_OR(u, 8, 2)
+#if !defined(VUINT8x2_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_OR_DEFINED
 #endif
-#ifndef VUINT8x2_XOR_DEFINED
-VEC_GENERIC_XOR(u, 8, 2)
+#if !defined(VUINT8x2_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_XOR_DEFINED
 #endif
-#ifndef VUINT8x2_NOT_DEFINED
-VEC_GENERIC_NOT(u, 8, 2)
+#if !defined(VUINT8x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VUINT8x2_NOT_DEFINED
 #endif
-#ifndef VUINT8x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(u, 8, 2)
+#if !defined(VUINT8x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VUINT8x2_CMPLT_DEFINED
 #endif
-#ifndef VUINT8x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(u, 8, 2)
+#if !defined(VUINT8x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VUINT8x2_CMPEQ_DEFINED
 #endif
-#ifndef VUINT8x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(u, 8, 2)
+#if !defined(VUINT8x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VUINT8x2_CMPGT_DEFINED
 #endif
-#ifndef VUINT8x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(u, 8, 2)
+#if !defined(VUINT8x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VUINT8x2_CMPLE_DEFINED
 #endif
-#ifndef VUINT8x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(u, 8, 2)
+#if !defined(VUINT8x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	return vec1;
+}
 # define VUINT8x2_CMPGE_DEFINED
 #endif
-#ifndef VUINT8x2_MIN_DEFINED
-VEC_GENERIC_MIN(u, 8, 2)
+#if !defined(VUINT8x2_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_MIN_DEFINED
 #endif
-#ifndef VUINT8x2_MAX_DEFINED
-VEC_GENERIC_MAX(u, 8, 2)
+#if !defined(VUINT8x2_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT8x2_MAX_DEFINED
 #endif
-#ifndef VUINT8x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(u, 8, 2)
+#if !defined(VUINT8x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT8x2_RSHIFT_DEFINED
 #endif
-#ifndef VUINT8x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(u, 8, 2)
+#if !defined(VUINT8x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT8x2_LRSHIFT_DEFINED
 #endif
-#ifndef VUINT8x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(u, 8, 2)
+#if !defined(VUINT8x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	return vec1;
+}
 # define VUINT8x2_LSHIFT_DEFINED
 #endif
-
-
-/* vuint16x2 */
-
-#ifndef VINT16x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(/* nothing */, 16, 2)
+#if !defined(VINT8x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x)
+{
+	vint8x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VINT8x4_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
+# define VINT8x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
+# define VINT8x4_LOAD_DEFINED
+#endif
+#if !defined(VINT8x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 x[4])
+{
+	memcpy(x, vec.generic, 4);
+}
+# define VINT8x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 x[4])
+{
+	memcpy(x, vec.generic, 4);
+}
+# define VINT8x4_STORE_DEFINED
+#endif
+#if !defined(VINT8x4_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_ADD_DEFINED
+#endif
+#if !defined(VINT8x4_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_SUB_DEFINED
+#endif
+#if !defined(VINT8x4_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_MUL_DEFINED
+#endif
+#if !defined(VINT8x4_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_div(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT8x4_DIV_DEFINED
+#endif
+#if !defined(VINT8x4_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_mod(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT8x4_MOD_DEFINED
+#endif
+#if !defined(VINT8x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT8x4_AVG_DEFINED
+#endif
+#if !defined(VINT8x4_AND_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_AND_DEFINED
+#endif
+#if !defined(VINT8x4_OR_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_OR_DEFINED
+#endif
+#if !defined(VINT8x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_XOR_DEFINED
+#endif
+#if !defined(VINT8x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VINT8x4_NOT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x4_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_MIN_DEFINED
+#endif
+#if !defined(VINT8x4_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_MAX_DEFINED
+#endif
+#if !defined(VINT8x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+	return vec1;
+}
+# define VINT8x4_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT8x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT8x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x)
+{
+	vuint8x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VUINT8x4_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
+# define VUINT8x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
+# define VUINT8x4_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 x[4])
+{
+	memcpy(x, vec.generic, 4);
+}
+# define VUINT8x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 x[4])
+{
+	memcpy(x, vec.generic, 4);
+}
+# define VUINT8x4_STORE_DEFINED
+#endif
+#if !defined(VUINT8x4_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_ADD_DEFINED
+#endif
+#if !defined(VUINT8x4_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_SUB_DEFINED
+#endif
+#if !defined(VUINT8x4_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_MUL_DEFINED
+#endif
+#if !defined(VUINT8x4_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_div(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT8x4_DIV_DEFINED
+#endif
+#if !defined(VUINT8x4_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_mod(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT8x4_MOD_DEFINED
+#endif
+#if !defined(VUINT8x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+	return vec1;
+}
+# define VUINT8x4_AVG_DEFINED
+#endif
+#if !defined(VUINT8x4_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_AND_DEFINED
+#endif
+#if !defined(VUINT8x4_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_OR_DEFINED
+#endif
+#if !defined(VUINT8x4_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_XOR_DEFINED
+#endif
+#if !defined(VUINT8x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VUINT8x4_NOT_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x4_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x4_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x4_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x4_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x4_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x4_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_MIN_DEFINED
+#endif
+#if !defined(VUINT8x4_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT8x4_MAX_DEFINED
+#endif
+#if !defined(VUINT8x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x4_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x)
+{
+	vint8x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VINT8x8_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VINT8x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VINT8x8_LOAD_DEFINED
+#endif
+#if !defined(VINT8x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 x[8])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VINT8x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 x[8])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VINT8x8_STORE_DEFINED
+#endif
+#if !defined(VINT8x8_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_ADD_DEFINED
+#endif
+#if !defined(VINT8x8_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_SUB_DEFINED
+#endif
+#if !defined(VINT8x8_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_MUL_DEFINED
+#endif
+#if !defined(VINT8x8_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_div(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT8x8_DIV_DEFINED
+#endif
+#if !defined(VINT8x8_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_mod(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT8x8_MOD_DEFINED
+#endif
+#if !defined(VINT8x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT8x8_AVG_DEFINED
+#endif
+#if !defined(VINT8x8_AND_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_AND_DEFINED
+#endif
+#if !defined(VINT8x8_OR_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_OR_DEFINED
+#endif
+#if !defined(VINT8x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_XOR_DEFINED
+#endif
+#if !defined(VINT8x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VINT8x8_NOT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x8_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_MIN_DEFINED
+#endif
+#if !defined(VINT8x8_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_MAX_DEFINED
+#endif
+#if !defined(VINT8x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+	return vec1;
+}
+# define VINT8x8_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT8x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT8x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x)
+{
+	vuint8x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VUINT8x8_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VUINT8x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VUINT8x8_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 x[8])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VUINT8x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 x[8])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VUINT8x8_STORE_DEFINED
+#endif
+#if !defined(VUINT8x8_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_ADD_DEFINED
+#endif
+#if !defined(VUINT8x8_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_SUB_DEFINED
+#endif
+#if !defined(VUINT8x8_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_MUL_DEFINED
+#endif
+#if !defined(VUINT8x8_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_div(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT8x8_DIV_DEFINED
+#endif
+#if !defined(VUINT8x8_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_mod(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT8x8_MOD_DEFINED
+#endif
+#if !defined(VUINT8x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+	return vec1;
+}
+# define VUINT8x8_AVG_DEFINED
+#endif
+#if !defined(VUINT8x8_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_AND_DEFINED
+#endif
+#if !defined(VUINT8x8_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_OR_DEFINED
+#endif
+#if !defined(VUINT8x8_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_XOR_DEFINED
+#endif
+#if !defined(VUINT8x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VUINT8x8_NOT_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x8_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x8_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x8_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x8_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x8_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x8_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_MIN_DEFINED
+#endif
+#if !defined(VUINT8x8_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT8x8_MAX_DEFINED
+#endif
+#if !defined(VUINT8x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
+{
+	vint8x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VINT8x16_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT8x16_LOAD_DEFINED
+#endif
+#if !defined(VINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 x[16])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT8x16_STORE_DEFINED
+#endif
+#if !defined(VINT8x16_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_ADD_DEFINED
+#endif
+#if !defined(VINT8x16_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_SUB_DEFINED
+#endif
+#if !defined(VINT8x16_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_MUL_DEFINED
+#endif
+#if !defined(VINT8x16_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_div(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT8x16_DIV_DEFINED
+#endif
+#if !defined(VINT8x16_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_mod(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT8x16_MOD_DEFINED
+#endif
+#if !defined(VINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT8x16_AVG_DEFINED
+#endif
+#if !defined(VINT8x16_AND_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_AND_DEFINED
+#endif
+#if !defined(VINT8x16_OR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_OR_DEFINED
+#endif
+#if !defined(VINT8x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_XOR_DEFINED
+#endif
+#if !defined(VINT8x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VINT8x16_NOT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x16_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_MIN_DEFINED
+#endif
+#if !defined(VINT8x16_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_MAX_DEFINED
+#endif
+#if !defined(VINT8x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+	return vec1;
+}
+# define VINT8x16_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
+{
+	vuint8x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VUINT8x16_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT8x16_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 x[16])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT8x16_STORE_DEFINED
+#endif
+#if !defined(VUINT8x16_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_ADD_DEFINED
+#endif
+#if !defined(VUINT8x16_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_SUB_DEFINED
+#endif
+#if !defined(VUINT8x16_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_MUL_DEFINED
+#endif
+#if !defined(VUINT8x16_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_div(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT8x16_DIV_DEFINED
+#endif
+#if !defined(VUINT8x16_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mod(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT8x16_MOD_DEFINED
+#endif
+#if !defined(VUINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+	return vec1;
+}
+# define VUINT8x16_AVG_DEFINED
+#endif
+#if !defined(VUINT8x16_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_AND_DEFINED
+#endif
+#if !defined(VUINT8x16_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_OR_DEFINED
+#endif
+#if !defined(VUINT8x16_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_XOR_DEFINED
+#endif
+#if !defined(VUINT8x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VUINT8x16_NOT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x16_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x16_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x16_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x16_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x16_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x16_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_MIN_DEFINED
+#endif
+#if !defined(VUINT8x16_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT8x16_MAX_DEFINED
+#endif
+#if !defined(VUINT8x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x16_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x32_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x)
+{
+	vint8x32 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	return vec;
+}
+# define VINT8x32_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x32_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT8x32_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x32_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT8x32_LOAD_DEFINED
+#endif
+#if !defined(VINT8x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 x[32])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT8x32_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 x[32])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT8x32_STORE_DEFINED
+#endif
+#if !defined(VINT8x32_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_ADD_DEFINED
+#endif
+#if !defined(VINT8x32_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_SUB_DEFINED
+#endif
+#if !defined(VINT8x32_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_MUL_DEFINED
+#endif
+#if !defined(VINT8x32_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_div(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VINT8x32_DIV_DEFINED
+#endif
+#if !defined(VINT8x32_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_mod(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VINT8x32_MOD_DEFINED
+#endif
+#if !defined(VINT8x32_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[16] % 2);
+	y_d_rem = (vec2.generic[16] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[17] % 2);
+	y_d_rem = (vec2.generic[17] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[18] % 2);
+	y_d_rem = (vec2.generic[18] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[19] % 2);
+	y_d_rem = (vec2.generic[19] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[20] % 2);
+	y_d_rem = (vec2.generic[20] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[21] % 2);
+	y_d_rem = (vec2.generic[21] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[22] % 2);
+	y_d_rem = (vec2.generic[22] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[23] % 2);
+	y_d_rem = (vec2.generic[23] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[24] % 2);
+	y_d_rem = (vec2.generic[24] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[25] % 2);
+	y_d_rem = (vec2.generic[25] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[26] % 2);
+	y_d_rem = (vec2.generic[26] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[27] % 2);
+	y_d_rem = (vec2.generic[27] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[28] % 2);
+	y_d_rem = (vec2.generic[28] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[29] % 2);
+	y_d_rem = (vec2.generic[29] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[30] % 2);
+	y_d_rem = (vec2.generic[30] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[31] % 2);
+	y_d_rem = (vec2.generic[31] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT8x32_AVG_DEFINED
+#endif
+#if !defined(VINT8x32_AND_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_AND_DEFINED
+#endif
+#if !defined(VINT8x32_OR_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_OR_DEFINED
+#endif
+#if !defined(VINT8x32_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_XOR_DEFINED
+#endif
+#if !defined(VINT8x32_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	return vec;
+}
+# define VINT8x32_NOT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x32_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x32_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x32_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x32_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x32_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x32_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x32_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x32_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x32_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_MIN_DEFINED
+#endif
+#if !defined(VINT8x32_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_MAX_DEFINED
+#endif
+#if !defined(VINT8x32_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]);
+vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]);
+vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]);
+vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]);
+vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]);
+vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]);
+vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]);
+vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]);
+vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]);
+vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]);
+vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]);
+vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]);
+vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]);
+vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]);
+vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]);
+vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]);
+	return vec1;
+}
+# define VINT8x32_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x32_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u >>= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u >>= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u >>= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u >>= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u >>= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u >>= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u >>= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u >>= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u >>= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u >>= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u >>= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u >>= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u >>= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u >>= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u >>= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u >>= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	return vec1;
+}
+# define VINT8x32_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x32_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u <<= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u <<= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u <<= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u <<= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u <<= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u <<= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u <<= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u <<= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u <<= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u <<= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u <<= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u <<= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u <<= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u <<= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u <<= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u <<= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	return vec1;
+}
+# define VINT8x32_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x32_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x)
+{
+	vuint8x32 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	return vec;
+}
+# define VUINT8x32_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x32_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT8x32_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x32_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT8x32_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 x[32])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT8x32_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 x[32])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT8x32_STORE_DEFINED
+#endif
+#if !defined(VUINT8x32_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_ADD_DEFINED
+#endif
+#if !defined(VUINT8x32_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_SUB_DEFINED
+#endif
+#if !defined(VUINT8x32_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_MUL_DEFINED
+#endif
+#if !defined(VUINT8x32_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_div(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VUINT8x32_DIV_DEFINED
+#endif
+#if !defined(VUINT8x32_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_mod(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VUINT8x32_MOD_DEFINED
+#endif
+#if !defined(VUINT8x32_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1);
+vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1);
+vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1);
+vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1);
+vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1);
+vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1);
+vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1);
+vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1);
+vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1);
+vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1);
+vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1);
+vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1);
+vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1);
+vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1);
+vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1);
+vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1);
+	return vec1;
+}
+# define VUINT8x32_AVG_DEFINED
+#endif
+#if !defined(VUINT8x32_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_AND_DEFINED
+#endif
+#if !defined(VUINT8x32_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_OR_DEFINED
+#endif
+#if !defined(VUINT8x32_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_XOR_DEFINED
+#endif
+#if !defined(VUINT8x32_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	return vec;
+}
+# define VUINT8x32_NOT_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x32_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x32_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x32_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x32_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x32_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x32_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x32_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_MIN_DEFINED
+#endif
+#if !defined(VUINT8x32_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VUINT8x32_MAX_DEFINED
+#endif
+#if !defined(VUINT8x32_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x32_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x32_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x32_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x32_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	vec1.generic[16] <<= vec2.generic[0];
+	vec1.generic[17] <<= vec2.generic[0];
+	vec1.generic[18] <<= vec2.generic[0];
+	vec1.generic[19] <<= vec2.generic[0];
+	vec1.generic[20] <<= vec2.generic[0];
+	vec1.generic[21] <<= vec2.generic[0];
+	vec1.generic[22] <<= vec2.generic[0];
+	vec1.generic[23] <<= vec2.generic[0];
+	vec1.generic[24] <<= vec2.generic[0];
+	vec1.generic[25] <<= vec2.generic[0];
+	vec1.generic[26] <<= vec2.generic[0];
+	vec1.generic[27] <<= vec2.generic[0];
+	vec1.generic[28] <<= vec2.generic[0];
+	vec1.generic[29] <<= vec2.generic[0];
+	vec1.generic[30] <<= vec2.generic[0];
+	vec1.generic[31] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x32_LSHIFT_DEFINED
+#endif
+#if !defined(VINT8x64_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x)
+{
+	vint8x64 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	vec.generic[32] = x;
+	vec.generic[33] = x;
+	vec.generic[34] = x;
+	vec.generic[35] = x;
+	vec.generic[36] = x;
+	vec.generic[37] = x;
+	vec.generic[38] = x;
+	vec.generic[39] = x;
+	vec.generic[40] = x;
+	vec.generic[41] = x;
+	vec.generic[42] = x;
+	vec.generic[43] = x;
+	vec.generic[44] = x;
+	vec.generic[45] = x;
+	vec.generic[46] = x;
+	vec.generic[47] = x;
+	vec.generic[48] = x;
+	vec.generic[49] = x;
+	vec.generic[50] = x;
+	vec.generic[51] = x;
+	vec.generic[52] = x;
+	vec.generic[53] = x;
+	vec.generic[54] = x;
+	vec.generic[55] = x;
+	vec.generic[56] = x;
+	vec.generic[57] = x;
+	vec.generic[58] = x;
+	vec.generic[59] = x;
+	vec.generic[60] = x;
+	vec.generic[61] = x;
+	vec.generic[62] = x;
+	vec.generic[63] = x;
+	return vec;
+}
+# define VINT8x64_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x64_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT8x64_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x64_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT8x64_LOAD_DEFINED
+#endif
+#if !defined(VINT8x64_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 x[64])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT8x64_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x64_STORE_DEFINED)
+VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 x[64])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT8x64_STORE_DEFINED
+#endif
+#if !defined(VINT8x64_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] + vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] + vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] + vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] + vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] + vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] + vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] + vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] + vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] + vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] + vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] + vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] + vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] + vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] + vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] + vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] + vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] + vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] + vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] + vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] + vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] + vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] + vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] + vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] + vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] + vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] + vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] + vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] + vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] + vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] + vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] + vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] + vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_ADD_DEFINED
+#endif
+#if !defined(VINT8x64_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] - vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] - vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] - vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] - vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] - vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] - vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] - vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] - vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] - vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] - vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] - vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] - vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] - vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] - vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] - vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] - vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] - vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] - vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] - vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] - vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] - vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] - vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] - vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] - vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] - vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] - vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] - vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] - vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] - vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] - vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] - vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] - vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_SUB_DEFINED
+#endif
+#if !defined(VINT8x64_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] * vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] * vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] * vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] * vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] * vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] * vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] * vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] * vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] * vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] * vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] * vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] * vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] * vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] * vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] * vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] * vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] * vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] * vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] * vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] * vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] * vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] * vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] * vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] * vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] * vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] * vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] * vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] * vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] * vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] * vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] * vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] * vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_MUL_DEFINED
+#endif
+#if !defined(VINT8x64_DIV_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_div(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] / vec2.generic[32]) : 0);
+	vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] / vec2.generic[33]) : 0);
+	vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] / vec2.generic[34]) : 0);
+	vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] / vec2.generic[35]) : 0);
+	vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] / vec2.generic[36]) : 0);
+	vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] / vec2.generic[37]) : 0);
+	vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] / vec2.generic[38]) : 0);
+	vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] / vec2.generic[39]) : 0);
+	vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] / vec2.generic[40]) : 0);
+	vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] / vec2.generic[41]) : 0);
+	vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] / vec2.generic[42]) : 0);
+	vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] / vec2.generic[43]) : 0);
+	vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] / vec2.generic[44]) : 0);
+	vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] / vec2.generic[45]) : 0);
+	vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] / vec2.generic[46]) : 0);
+	vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] / vec2.generic[47]) : 0);
+	vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] / vec2.generic[48]) : 0);
+	vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] / vec2.generic[49]) : 0);
+	vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] / vec2.generic[50]) : 0);
+	vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] / vec2.generic[51]) : 0);
+	vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] / vec2.generic[52]) : 0);
+	vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] / vec2.generic[53]) : 0);
+	vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] / vec2.generic[54]) : 0);
+	vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] / vec2.generic[55]) : 0);
+	vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] / vec2.generic[56]) : 0);
+	vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] / vec2.generic[57]) : 0);
+	vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] / vec2.generic[58]) : 0);
+	vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] / vec2.generic[59]) : 0);
+	vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] / vec2.generic[60]) : 0);
+	vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] / vec2.generic[61]) : 0);
+	vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] / vec2.generic[62]) : 0);
+	vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] / vec2.generic[63]) : 0);
+	return vec1;
+}
+# define VINT8x64_DIV_DEFINED
+#endif
+#if !defined(VINT8x64_MOD_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_mod(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] % vec2.generic[32]) : 0);
+	vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] % vec2.generic[33]) : 0);
+	vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] % vec2.generic[34]) : 0);
+	vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] % vec2.generic[35]) : 0);
+	vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] % vec2.generic[36]) : 0);
+	vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] % vec2.generic[37]) : 0);
+	vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] % vec2.generic[38]) : 0);
+	vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] % vec2.generic[39]) : 0);
+	vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] % vec2.generic[40]) : 0);
+	vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] % vec2.generic[41]) : 0);
+	vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] % vec2.generic[42]) : 0);
+	vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] % vec2.generic[43]) : 0);
+	vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] % vec2.generic[44]) : 0);
+	vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] % vec2.generic[45]) : 0);
+	vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] % vec2.generic[46]) : 0);
+	vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] % vec2.generic[47]) : 0);
+	vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] % vec2.generic[48]) : 0);
+	vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] % vec2.generic[49]) : 0);
+	vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] % vec2.generic[50]) : 0);
+	vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] % vec2.generic[51]) : 0);
+	vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] % vec2.generic[52]) : 0);
+	vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] % vec2.generic[53]) : 0);
+	vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] % vec2.generic[54]) : 0);
+	vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] % vec2.generic[55]) : 0);
+	vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] % vec2.generic[56]) : 0);
+	vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] % vec2.generic[57]) : 0);
+	vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] % vec2.generic[58]) : 0);
+	vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] % vec2.generic[59]) : 0);
+	vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] % vec2.generic[60]) : 0);
+	vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] % vec2.generic[61]) : 0);
+	vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] % vec2.generic[62]) : 0);
+	vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] % vec2.generic[63]) : 0);
+	return vec1;
+}
+# define VINT8x64_MOD_DEFINED
+#endif
+#if !defined(VINT8x64_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2)
+{
+	vec_int8 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[16] % 2);
+	y_d_rem = (vec2.generic[16] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[17] % 2);
+	y_d_rem = (vec2.generic[17] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[18] % 2);
+	y_d_rem = (vec2.generic[18] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[19] % 2);
+	y_d_rem = (vec2.generic[19] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[20] % 2);
+	y_d_rem = (vec2.generic[20] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[21] % 2);
+	y_d_rem = (vec2.generic[21] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[22] % 2);
+	y_d_rem = (vec2.generic[22] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[23] % 2);
+	y_d_rem = (vec2.generic[23] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[24] % 2);
+	y_d_rem = (vec2.generic[24] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[25] % 2);
+	y_d_rem = (vec2.generic[25] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[26] % 2);
+	y_d_rem = (vec2.generic[26] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[27] % 2);
+	y_d_rem = (vec2.generic[27] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[28] % 2);
+	y_d_rem = (vec2.generic[28] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[29] % 2);
+	y_d_rem = (vec2.generic[29] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[30] % 2);
+	y_d_rem = (vec2.generic[30] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[31] % 2);
+	y_d_rem = (vec2.generic[31] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[32] % 2);
+	y_d_rem = (vec2.generic[32] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[32] = ((vec1.generic[32] / 2) + (vec2.generic[32] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[33] % 2);
+	y_d_rem = (vec2.generic[33] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[33] = ((vec1.generic[33] / 2) + (vec2.generic[33] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[34] % 2);
+	y_d_rem = (vec2.generic[34] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[34] = ((vec1.generic[34] / 2) + (vec2.generic[34] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[35] % 2);
+	y_d_rem = (vec2.generic[35] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[35] = ((vec1.generic[35] / 2) + (vec2.generic[35] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[36] % 2);
+	y_d_rem = (vec2.generic[36] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[36] = ((vec1.generic[36] / 2) + (vec2.generic[36] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[37] % 2);
+	y_d_rem = (vec2.generic[37] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[37] = ((vec1.generic[37] / 2) + (vec2.generic[37] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[38] % 2);
+	y_d_rem = (vec2.generic[38] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[38] = ((vec1.generic[38] / 2) + (vec2.generic[38] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[39] % 2);
+	y_d_rem = (vec2.generic[39] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[39] = ((vec1.generic[39] / 2) + (vec2.generic[39] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[40] % 2);
+	y_d_rem = (vec2.generic[40] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[40] = ((vec1.generic[40] / 2) + (vec2.generic[40] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[41] % 2);
+	y_d_rem = (vec2.generic[41] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[41] = ((vec1.generic[41] / 2) + (vec2.generic[41] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[42] % 2);
+	y_d_rem = (vec2.generic[42] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[42] = ((vec1.generic[42] / 2) + (vec2.generic[42] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[43] % 2);
+	y_d_rem = (vec2.generic[43] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[43] = ((vec1.generic[43] / 2) + (vec2.generic[43] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[44] % 2);
+	y_d_rem = (vec2.generic[44] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[44] = ((vec1.generic[44] / 2) + (vec2.generic[44] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[45] % 2);
+	y_d_rem = (vec2.generic[45] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[45] = ((vec1.generic[45] / 2) + (vec2.generic[45] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[46] % 2);
+	y_d_rem = (vec2.generic[46] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[46] = ((vec1.generic[46] / 2) + (vec2.generic[46] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[47] % 2);
+	y_d_rem = (vec2.generic[47] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[47] = ((vec1.generic[47] / 2) + (vec2.generic[47] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[48] % 2);
+	y_d_rem = (vec2.generic[48] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[48] = ((vec1.generic[48] / 2) + (vec2.generic[48] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[49] % 2);
+	y_d_rem = (vec2.generic[49] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[49] = ((vec1.generic[49] / 2) + (vec2.generic[49] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[50] % 2);
+	y_d_rem = (vec2.generic[50] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[50] = ((vec1.generic[50] / 2) + (vec2.generic[50] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[51] % 2);
+	y_d_rem = (vec2.generic[51] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[51] = ((vec1.generic[51] / 2) + (vec2.generic[51] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[52] % 2);
+	y_d_rem = (vec2.generic[52] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[52] = ((vec1.generic[52] / 2) + (vec2.generic[52] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[53] % 2);
+	y_d_rem = (vec2.generic[53] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[53] = ((vec1.generic[53] / 2) + (vec2.generic[53] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[54] % 2);
+	y_d_rem = (vec2.generic[54] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[54] = ((vec1.generic[54] / 2) + (vec2.generic[54] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[55] % 2);
+	y_d_rem = (vec2.generic[55] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[55] = ((vec1.generic[55] / 2) + (vec2.generic[55] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[56] % 2);
+	y_d_rem = (vec2.generic[56] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[56] = ((vec1.generic[56] / 2) + (vec2.generic[56] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[57] % 2);
+	y_d_rem = (vec2.generic[57] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[57] = ((vec1.generic[57] / 2) + (vec2.generic[57] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[58] % 2);
+	y_d_rem = (vec2.generic[58] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[58] = ((vec1.generic[58] / 2) + (vec2.generic[58] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[59] % 2);
+	y_d_rem = (vec2.generic[59] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[59] = ((vec1.generic[59] / 2) + (vec2.generic[59] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[60] % 2);
+	y_d_rem = (vec2.generic[60] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[60] = ((vec1.generic[60] / 2) + (vec2.generic[60] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[61] % 2);
+	y_d_rem = (vec2.generic[61] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[61] = ((vec1.generic[61] / 2) + (vec2.generic[61] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[62] % 2);
+	y_d_rem = (vec2.generic[62] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[62] = ((vec1.generic[62] / 2) + (vec2.generic[62] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[63] % 2);
+	y_d_rem = (vec2.generic[63] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[63] = ((vec1.generic[63] / 2) + (vec2.generic[63] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT8x64_AVG_DEFINED
+#endif
+#if !defined(VINT8x64_AND_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] & vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] & vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] & vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] & vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] & vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] & vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] & vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] & vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] & vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] & vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] & vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] & vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] & vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] & vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] & vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] & vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] & vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] & vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] & vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] & vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] & vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] & vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] & vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] & vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] & vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] & vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] & vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] & vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] & vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] & vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] & vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] & vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_AND_DEFINED
+#endif
+#if !defined(VINT8x64_OR_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] | vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] | vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] | vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] | vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] | vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] | vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] | vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] | vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] | vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] | vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] | vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] | vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] | vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] | vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] | vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] | vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] | vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] | vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] | vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] | vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] | vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] | vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] | vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] | vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] | vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] | vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] | vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] | vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] | vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] | vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] | vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] | vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_OR_DEFINED
+#endif
+#if !defined(VINT8x64_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] ^ vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] ^ vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] ^ vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] ^ vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] ^ vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] ^ vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] ^ vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] ^ vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] ^ vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] ^ vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] ^ vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] ^ vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] ^ vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] ^ vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] ^ vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] ^ vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] ^ vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] ^ vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] ^ vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] ^ vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] ^ vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] ^ vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] ^ vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] ^ vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] ^ vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] ^ vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] ^ vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] ^ vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] ^ vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] ^ vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] ^ vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] ^ vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_XOR_DEFINED
+#endif
+#if !defined(VINT8x64_NOT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	vec.generic[32] = ~vec.generic[32];
+	vec.generic[33] = ~vec.generic[33];
+	vec.generic[34] = ~vec.generic[34];
+	vec.generic[35] = ~vec.generic[35];
+	vec.generic[36] = ~vec.generic[36];
+	vec.generic[37] = ~vec.generic[37];
+	vec.generic[38] = ~vec.generic[38];
+	vec.generic[39] = ~vec.generic[39];
+	vec.generic[40] = ~vec.generic[40];
+	vec.generic[41] = ~vec.generic[41];
+	vec.generic[42] = ~vec.generic[42];
+	vec.generic[43] = ~vec.generic[43];
+	vec.generic[44] = ~vec.generic[44];
+	vec.generic[45] = ~vec.generic[45];
+	vec.generic[46] = ~vec.generic[46];
+	vec.generic[47] = ~vec.generic[47];
+	vec.generic[48] = ~vec.generic[48];
+	vec.generic[49] = ~vec.generic[49];
+	vec.generic[50] = ~vec.generic[50];
+	vec.generic[51] = ~vec.generic[51];
+	vec.generic[52] = ~vec.generic[52];
+	vec.generic[53] = ~vec.generic[53];
+	vec.generic[54] = ~vec.generic[54];
+	vec.generic[55] = ~vec.generic[55];
+	vec.generic[56] = ~vec.generic[56];
+	vec.generic[57] = ~vec.generic[57];
+	vec.generic[58] = ~vec.generic[58];
+	vec.generic[59] = ~vec.generic[59];
+	vec.generic[60] = ~vec.generic[60];
+	vec.generic[61] = ~vec.generic[61];
+	vec.generic[62] = ~vec.generic[62];
+	vec.generic[63] = ~vec.generic[63];
+	return vec;
+}
+# define VINT8x64_NOT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] < vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] < vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] < vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] < vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] < vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] < vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] < vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] < vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] < vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] < vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] < vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] < vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] < vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] < vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] < vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] < vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] < vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] < vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] < vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] < vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] < vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] < vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] < vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] < vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] < vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] < vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] < vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] < vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] < vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] < vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] < vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] < vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x64_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] == vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] == vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] == vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] == vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] == vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] == vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] == vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] == vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] == vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] == vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] == vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] == vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] == vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] == vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] == vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] == vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] == vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] == vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] == vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] == vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] == vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] == vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] == vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] == vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] == vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] == vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] == vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] == vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] == vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] == vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] == vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] == vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x64_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x64_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] > vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] > vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] > vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] > vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] > vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] > vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] > vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] > vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] > vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] > vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] > vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] > vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] > vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] > vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] > vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] > vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] > vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] > vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] > vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] > vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] > vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] > vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] > vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] > vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] > vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] > vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] > vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] > vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] > vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] > vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] > vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] > vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x64_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x64_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] <= vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] <= vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] <= vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] <= vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] <= vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] <= vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] <= vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] <= vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] <= vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] <= vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] <= vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] <= vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] <= vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] <= vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] <= vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] <= vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] <= vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] <= vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] <= vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] <= vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] <= vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] <= vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] <= vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] <= vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] <= vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] <= vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] <= vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] <= vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] <= vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] <= vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] <= vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] <= vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x64_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x64_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] >= vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] >= vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] >= vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] >= vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] >= vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] >= vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] >= vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] >= vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] >= vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] >= vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] >= vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] >= vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] >= vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] >= vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] >= vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] >= vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] >= vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] >= vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] >= vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] >= vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] >= vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] >= vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] >= vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] >= vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] >= vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] >= vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] >= vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] >= vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] >= vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] >= vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] >= vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] >= vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VINT8x64_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x64_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] < vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] < vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] < vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] < vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] < vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] < vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] < vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] < vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] < vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] < vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] < vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] < vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] < vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] < vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] < vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] < vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] < vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] < vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] < vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] < vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] < vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] < vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] < vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] < vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] < vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] < vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] < vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] < vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] < vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] < vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] < vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] < vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_MIN_DEFINED
+#endif
+#if !defined(VINT8x64_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] > vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] > vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] > vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] > vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] > vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] > vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] > vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] > vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] > vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] > vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] > vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] > vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] > vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] > vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] > vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] > vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] > vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] > vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] > vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] > vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] > vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] > vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] > vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] > vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] > vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] > vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] > vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] > vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] > vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] > vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] > vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] > vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_MAX_DEFINED
+#endif
+#if !defined(VINT8x64_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]);
+vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]);
+vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]);
+vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]);
+vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]);
+vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]);
+vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]);
+vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]);
+vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]);
+vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]);
+vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]);
+vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]);
+vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]);
+vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]);
+vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]);
+vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]);
+vec1.generic[32] = ((~vec1.generic[32]) >> vec2.generic[32]);
+vec1.generic[33] = ((~vec1.generic[33]) >> vec2.generic[33]);
+vec1.generic[34] = ((~vec1.generic[34]) >> vec2.generic[34]);
+vec1.generic[35] = ((~vec1.generic[35]) >> vec2.generic[35]);
+vec1.generic[36] = ((~vec1.generic[36]) >> vec2.generic[36]);
+vec1.generic[37] = ((~vec1.generic[37]) >> vec2.generic[37]);
+vec1.generic[38] = ((~vec1.generic[38]) >> vec2.generic[38]);
+vec1.generic[39] = ((~vec1.generic[39]) >> vec2.generic[39]);
+vec1.generic[40] = ((~vec1.generic[40]) >> vec2.generic[40]);
+vec1.generic[41] = ((~vec1.generic[41]) >> vec2.generic[41]);
+vec1.generic[42] = ((~vec1.generic[42]) >> vec2.generic[42]);
+vec1.generic[43] = ((~vec1.generic[43]) >> vec2.generic[43]);
+vec1.generic[44] = ((~vec1.generic[44]) >> vec2.generic[44]);
+vec1.generic[45] = ((~vec1.generic[45]) >> vec2.generic[45]);
+vec1.generic[46] = ((~vec1.generic[46]) >> vec2.generic[46]);
+vec1.generic[47] = ((~vec1.generic[47]) >> vec2.generic[47]);
+vec1.generic[48] = ((~vec1.generic[48]) >> vec2.generic[48]);
+vec1.generic[49] = ((~vec1.generic[49]) >> vec2.generic[49]);
+vec1.generic[50] = ((~vec1.generic[50]) >> vec2.generic[50]);
+vec1.generic[51] = ((~vec1.generic[51]) >> vec2.generic[51]);
+vec1.generic[52] = ((~vec1.generic[52]) >> vec2.generic[52]);
+vec1.generic[53] = ((~vec1.generic[53]) >> vec2.generic[53]);
+vec1.generic[54] = ((~vec1.generic[54]) >> vec2.generic[54]);
+vec1.generic[55] = ((~vec1.generic[55]) >> vec2.generic[55]);
+vec1.generic[56] = ((~vec1.generic[56]) >> vec2.generic[56]);
+vec1.generic[57] = ((~vec1.generic[57]) >> vec2.generic[57]);
+vec1.generic[58] = ((~vec1.generic[58]) >> vec2.generic[58]);
+vec1.generic[59] = ((~vec1.generic[59]) >> vec2.generic[59]);
+vec1.generic[60] = ((~vec1.generic[60]) >> vec2.generic[60]);
+vec1.generic[61] = ((~vec1.generic[61]) >> vec2.generic[61]);
+vec1.generic[62] = ((~vec1.generic[62]) >> vec2.generic[62]);
+vec1.generic[63] = ((~vec1.generic[63]) >> vec2.generic[63]);
+	return vec1;
+}
+# define VINT8x64_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x64_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u >>= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u >>= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u >>= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u >>= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u >>= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u >>= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u >>= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u >>= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u >>= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u >>= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u >>= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u >>= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u >>= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u >>= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u >>= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u >>= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	x.s = vec1.generic[32];
+	x.u >>= vec2.generic[32];
+	vec1.generic[32] = x.s;
+	x.s = vec1.generic[33];
+	x.u >>= vec2.generic[33];
+	vec1.generic[33] = x.s;
+	x.s = vec1.generic[34];
+	x.u >>= vec2.generic[34];
+	vec1.generic[34] = x.s;
+	x.s = vec1.generic[35];
+	x.u >>= vec2.generic[35];
+	vec1.generic[35] = x.s;
+	x.s = vec1.generic[36];
+	x.u >>= vec2.generic[36];
+	vec1.generic[36] = x.s;
+	x.s = vec1.generic[37];
+	x.u >>= vec2.generic[37];
+	vec1.generic[37] = x.s;
+	x.s = vec1.generic[38];
+	x.u >>= vec2.generic[38];
+	vec1.generic[38] = x.s;
+	x.s = vec1.generic[39];
+	x.u >>= vec2.generic[39];
+	vec1.generic[39] = x.s;
+	x.s = vec1.generic[40];
+	x.u >>= vec2.generic[40];
+	vec1.generic[40] = x.s;
+	x.s = vec1.generic[41];
+	x.u >>= vec2.generic[41];
+	vec1.generic[41] = x.s;
+	x.s = vec1.generic[42];
+	x.u >>= vec2.generic[42];
+	vec1.generic[42] = x.s;
+	x.s = vec1.generic[43];
+	x.u >>= vec2.generic[43];
+	vec1.generic[43] = x.s;
+	x.s = vec1.generic[44];
+	x.u >>= vec2.generic[44];
+	vec1.generic[44] = x.s;
+	x.s = vec1.generic[45];
+	x.u >>= vec2.generic[45];
+	vec1.generic[45] = x.s;
+	x.s = vec1.generic[46];
+	x.u >>= vec2.generic[46];
+	vec1.generic[46] = x.s;
+	x.s = vec1.generic[47];
+	x.u >>= vec2.generic[47];
+	vec1.generic[47] = x.s;
+	x.s = vec1.generic[48];
+	x.u >>= vec2.generic[48];
+	vec1.generic[48] = x.s;
+	x.s = vec1.generic[49];
+	x.u >>= vec2.generic[49];
+	vec1.generic[49] = x.s;
+	x.s = vec1.generic[50];
+	x.u >>= vec2.generic[50];
+	vec1.generic[50] = x.s;
+	x.s = vec1.generic[51];
+	x.u >>= vec2.generic[51];
+	vec1.generic[51] = x.s;
+	x.s = vec1.generic[52];
+	x.u >>= vec2.generic[52];
+	vec1.generic[52] = x.s;
+	x.s = vec1.generic[53];
+	x.u >>= vec2.generic[53];
+	vec1.generic[53] = x.s;
+	x.s = vec1.generic[54];
+	x.u >>= vec2.generic[54];
+	vec1.generic[54] = x.s;
+	x.s = vec1.generic[55];
+	x.u >>= vec2.generic[55];
+	vec1.generic[55] = x.s;
+	x.s = vec1.generic[56];
+	x.u >>= vec2.generic[56];
+	vec1.generic[56] = x.s;
+	x.s = vec1.generic[57];
+	x.u >>= vec2.generic[57];
+	vec1.generic[57] = x.s;
+	x.s = vec1.generic[58];
+	x.u >>= vec2.generic[58];
+	vec1.generic[58] = x.s;
+	x.s = vec1.generic[59];
+	x.u >>= vec2.generic[59];
+	vec1.generic[59] = x.s;
+	x.s = vec1.generic[60];
+	x.u >>= vec2.generic[60];
+	vec1.generic[60] = x.s;
+	x.s = vec1.generic[61];
+	x.u >>= vec2.generic[61];
+	vec1.generic[61] = x.s;
+	x.s = vec1.generic[62];
+	x.u >>= vec2.generic[62];
+	vec1.generic[62] = x.s;
+	x.s = vec1.generic[63];
+	x.u >>= vec2.generic[63];
+	vec1.generic[63] = x.s;
+	return vec1;
+}
+# define VINT8x64_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x64_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	union { vec_uint8 u; vec_int8 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u <<= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u <<= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u <<= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u <<= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u <<= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u <<= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u <<= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u <<= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u <<= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u <<= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u <<= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u <<= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u <<= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u <<= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u <<= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u <<= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	x.s = vec1.generic[32];
+	x.u <<= vec2.generic[32];
+	vec1.generic[32] = x.s;
+	x.s = vec1.generic[33];
+	x.u <<= vec2.generic[33];
+	vec1.generic[33] = x.s;
+	x.s = vec1.generic[34];
+	x.u <<= vec2.generic[34];
+	vec1.generic[34] = x.s;
+	x.s = vec1.generic[35];
+	x.u <<= vec2.generic[35];
+	vec1.generic[35] = x.s;
+	x.s = vec1.generic[36];
+	x.u <<= vec2.generic[36];
+	vec1.generic[36] = x.s;
+	x.s = vec1.generic[37];
+	x.u <<= vec2.generic[37];
+	vec1.generic[37] = x.s;
+	x.s = vec1.generic[38];
+	x.u <<= vec2.generic[38];
+	vec1.generic[38] = x.s;
+	x.s = vec1.generic[39];
+	x.u <<= vec2.generic[39];
+	vec1.generic[39] = x.s;
+	x.s = vec1.generic[40];
+	x.u <<= vec2.generic[40];
+	vec1.generic[40] = x.s;
+	x.s = vec1.generic[41];
+	x.u <<= vec2.generic[41];
+	vec1.generic[41] = x.s;
+	x.s = vec1.generic[42];
+	x.u <<= vec2.generic[42];
+	vec1.generic[42] = x.s;
+	x.s = vec1.generic[43];
+	x.u <<= vec2.generic[43];
+	vec1.generic[43] = x.s;
+	x.s = vec1.generic[44];
+	x.u <<= vec2.generic[44];
+	vec1.generic[44] = x.s;
+	x.s = vec1.generic[45];
+	x.u <<= vec2.generic[45];
+	vec1.generic[45] = x.s;
+	x.s = vec1.generic[46];
+	x.u <<= vec2.generic[46];
+	vec1.generic[46] = x.s;
+	x.s = vec1.generic[47];
+	x.u <<= vec2.generic[47];
+	vec1.generic[47] = x.s;
+	x.s = vec1.generic[48];
+	x.u <<= vec2.generic[48];
+	vec1.generic[48] = x.s;
+	x.s = vec1.generic[49];
+	x.u <<= vec2.generic[49];
+	vec1.generic[49] = x.s;
+	x.s = vec1.generic[50];
+	x.u <<= vec2.generic[50];
+	vec1.generic[50] = x.s;
+	x.s = vec1.generic[51];
+	x.u <<= vec2.generic[51];
+	vec1.generic[51] = x.s;
+	x.s = vec1.generic[52];
+	x.u <<= vec2.generic[52];
+	vec1.generic[52] = x.s;
+	x.s = vec1.generic[53];
+	x.u <<= vec2.generic[53];
+	vec1.generic[53] = x.s;
+	x.s = vec1.generic[54];
+	x.u <<= vec2.generic[54];
+	vec1.generic[54] = x.s;
+	x.s = vec1.generic[55];
+	x.u <<= vec2.generic[55];
+	vec1.generic[55] = x.s;
+	x.s = vec1.generic[56];
+	x.u <<= vec2.generic[56];
+	vec1.generic[56] = x.s;
+	x.s = vec1.generic[57];
+	x.u <<= vec2.generic[57];
+	vec1.generic[57] = x.s;
+	x.s = vec1.generic[58];
+	x.u <<= vec2.generic[58];
+	vec1.generic[58] = x.s;
+	x.s = vec1.generic[59];
+	x.u <<= vec2.generic[59];
+	vec1.generic[59] = x.s;
+	x.s = vec1.generic[60];
+	x.u <<= vec2.generic[60];
+	vec1.generic[60] = x.s;
+	x.s = vec1.generic[61];
+	x.u <<= vec2.generic[61];
+	vec1.generic[61] = x.s;
+	x.s = vec1.generic[62];
+	x.u <<= vec2.generic[62];
+	vec1.generic[62] = x.s;
+	x.s = vec1.generic[63];
+	x.u <<= vec2.generic[63];
+	vec1.generic[63] = x.s;
+	return vec1;
+}
+# define VINT8x64_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x64_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x)
+{
+	vuint8x64 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	vec.generic[32] = x;
+	vec.generic[33] = x;
+	vec.generic[34] = x;
+	vec.generic[35] = x;
+	vec.generic[36] = x;
+	vec.generic[37] = x;
+	vec.generic[38] = x;
+	vec.generic[39] = x;
+	vec.generic[40] = x;
+	vec.generic[41] = x;
+	vec.generic[42] = x;
+	vec.generic[43] = x;
+	vec.generic[44] = x;
+	vec.generic[45] = x;
+	vec.generic[46] = x;
+	vec.generic[47] = x;
+	vec.generic[48] = x;
+	vec.generic[49] = x;
+	vec.generic[50] = x;
+	vec.generic[51] = x;
+	vec.generic[52] = x;
+	vec.generic[53] = x;
+	vec.generic[54] = x;
+	vec.generic[55] = x;
+	vec.generic[56] = x;
+	vec.generic[57] = x;
+	vec.generic[58] = x;
+	vec.generic[59] = x;
+	vec.generic[60] = x;
+	vec.generic[61] = x;
+	vec.generic[62] = x;
+	vec.generic[63] = x;
+	return vec;
+}
+# define VUINT8x64_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x64_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT8x64_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x64_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT8x64_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x64_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 x[64])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT8x64_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x64_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 x[64])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT8x64_STORE_DEFINED
+#endif
+#if !defined(VUINT8x64_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] + vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] + vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] + vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] + vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] + vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] + vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] + vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] + vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] + vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] + vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] + vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] + vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] + vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] + vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] + vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] + vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] + vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] + vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] + vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] + vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] + vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] + vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] + vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] + vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] + vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] + vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] + vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] + vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] + vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] + vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] + vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] + vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_ADD_DEFINED
+#endif
+#if !defined(VUINT8x64_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] - vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] - vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] - vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] - vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] - vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] - vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] - vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] - vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] - vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] - vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] - vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] - vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] - vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] - vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] - vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] - vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] - vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] - vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] - vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] - vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] - vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] - vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] - vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] - vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] - vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] - vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] - vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] - vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] - vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] - vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] - vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] - vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_SUB_DEFINED
+#endif
+#if !defined(VUINT8x64_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] * vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] * vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] * vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] * vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] * vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] * vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] * vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] * vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] * vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] * vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] * vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] * vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] * vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] * vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] * vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] * vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] * vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] * vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] * vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] * vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] * vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] * vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] * vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] * vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] * vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] * vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] * vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] * vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] * vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] * vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] * vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] * vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_MUL_DEFINED
+#endif
+#if !defined(VUINT8x64_DIV_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_div(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] / vec2.generic[32]) : 0);
+	vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] / vec2.generic[33]) : 0);
+	vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] / vec2.generic[34]) : 0);
+	vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] / vec2.generic[35]) : 0);
+	vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] / vec2.generic[36]) : 0);
+	vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] / vec2.generic[37]) : 0);
+	vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] / vec2.generic[38]) : 0);
+	vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] / vec2.generic[39]) : 0);
+	vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] / vec2.generic[40]) : 0);
+	vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] / vec2.generic[41]) : 0);
+	vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] / vec2.generic[42]) : 0);
+	vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] / vec2.generic[43]) : 0);
+	vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] / vec2.generic[44]) : 0);
+	vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] / vec2.generic[45]) : 0);
+	vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] / vec2.generic[46]) : 0);
+	vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] / vec2.generic[47]) : 0);
+	vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] / vec2.generic[48]) : 0);
+	vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] / vec2.generic[49]) : 0);
+	vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] / vec2.generic[50]) : 0);
+	vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] / vec2.generic[51]) : 0);
+	vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] / vec2.generic[52]) : 0);
+	vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] / vec2.generic[53]) : 0);
+	vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] / vec2.generic[54]) : 0);
+	vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] / vec2.generic[55]) : 0);
+	vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] / vec2.generic[56]) : 0);
+	vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] / vec2.generic[57]) : 0);
+	vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] / vec2.generic[58]) : 0);
+	vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] / vec2.generic[59]) : 0);
+	vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] / vec2.generic[60]) : 0);
+	vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] / vec2.generic[61]) : 0);
+	vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] / vec2.generic[62]) : 0);
+	vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] / vec2.generic[63]) : 0);
+	return vec1;
+}
+# define VUINT8x64_DIV_DEFINED
+#endif
+#if !defined(VUINT8x64_MOD_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_mod(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	vec1.generic[32] = (vec2.generic[32] ? (vec1.generic[32] % vec2.generic[32]) : 0);
+	vec1.generic[33] = (vec2.generic[33] ? (vec1.generic[33] % vec2.generic[33]) : 0);
+	vec1.generic[34] = (vec2.generic[34] ? (vec1.generic[34] % vec2.generic[34]) : 0);
+	vec1.generic[35] = (vec2.generic[35] ? (vec1.generic[35] % vec2.generic[35]) : 0);
+	vec1.generic[36] = (vec2.generic[36] ? (vec1.generic[36] % vec2.generic[36]) : 0);
+	vec1.generic[37] = (vec2.generic[37] ? (vec1.generic[37] % vec2.generic[37]) : 0);
+	vec1.generic[38] = (vec2.generic[38] ? (vec1.generic[38] % vec2.generic[38]) : 0);
+	vec1.generic[39] = (vec2.generic[39] ? (vec1.generic[39] % vec2.generic[39]) : 0);
+	vec1.generic[40] = (vec2.generic[40] ? (vec1.generic[40] % vec2.generic[40]) : 0);
+	vec1.generic[41] = (vec2.generic[41] ? (vec1.generic[41] % vec2.generic[41]) : 0);
+	vec1.generic[42] = (vec2.generic[42] ? (vec1.generic[42] % vec2.generic[42]) : 0);
+	vec1.generic[43] = (vec2.generic[43] ? (vec1.generic[43] % vec2.generic[43]) : 0);
+	vec1.generic[44] = (vec2.generic[44] ? (vec1.generic[44] % vec2.generic[44]) : 0);
+	vec1.generic[45] = (vec2.generic[45] ? (vec1.generic[45] % vec2.generic[45]) : 0);
+	vec1.generic[46] = (vec2.generic[46] ? (vec1.generic[46] % vec2.generic[46]) : 0);
+	vec1.generic[47] = (vec2.generic[47] ? (vec1.generic[47] % vec2.generic[47]) : 0);
+	vec1.generic[48] = (vec2.generic[48] ? (vec1.generic[48] % vec2.generic[48]) : 0);
+	vec1.generic[49] = (vec2.generic[49] ? (vec1.generic[49] % vec2.generic[49]) : 0);
+	vec1.generic[50] = (vec2.generic[50] ? (vec1.generic[50] % vec2.generic[50]) : 0);
+	vec1.generic[51] = (vec2.generic[51] ? (vec1.generic[51] % vec2.generic[51]) : 0);
+	vec1.generic[52] = (vec2.generic[52] ? (vec1.generic[52] % vec2.generic[52]) : 0);
+	vec1.generic[53] = (vec2.generic[53] ? (vec1.generic[53] % vec2.generic[53]) : 0);
+	vec1.generic[54] = (vec2.generic[54] ? (vec1.generic[54] % vec2.generic[54]) : 0);
+	vec1.generic[55] = (vec2.generic[55] ? (vec1.generic[55] % vec2.generic[55]) : 0);
+	vec1.generic[56] = (vec2.generic[56] ? (vec1.generic[56] % vec2.generic[56]) : 0);
+	vec1.generic[57] = (vec2.generic[57] ? (vec1.generic[57] % vec2.generic[57]) : 0);
+	vec1.generic[58] = (vec2.generic[58] ? (vec1.generic[58] % vec2.generic[58]) : 0);
+	vec1.generic[59] = (vec2.generic[59] ? (vec1.generic[59] % vec2.generic[59]) : 0);
+	vec1.generic[60] = (vec2.generic[60] ? (vec1.generic[60] % vec2.generic[60]) : 0);
+	vec1.generic[61] = (vec2.generic[61] ? (vec1.generic[61] % vec2.generic[61]) : 0);
+	vec1.generic[62] = (vec2.generic[62] ? (vec1.generic[62] % vec2.generic[62]) : 0);
+	vec1.generic[63] = (vec2.generic[63] ? (vec1.generic[63] % vec2.generic[63]) : 0);
+	return vec1;
+}
+# define VUINT8x64_MOD_DEFINED
+#endif
+#if !defined(VUINT8x64_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1);
+vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1);
+vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1);
+vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1);
+vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1);
+vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1);
+vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1);
+vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1);
+vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1);
+vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1);
+vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1);
+vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1);
+vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1);
+vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1);
+vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1);
+vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1);
+vec1.generic[32] = (vec1.generic[32] >> 1) + (vec2.generic[32] >> 1) + ((vec1.generic[32] | vec2.generic[32]) & 1);
+vec1.generic[33] = (vec1.generic[33] >> 1) + (vec2.generic[33] >> 1) + ((vec1.generic[33] | vec2.generic[33]) & 1);
+vec1.generic[34] = (vec1.generic[34] >> 1) + (vec2.generic[34] >> 1) + ((vec1.generic[34] | vec2.generic[34]) & 1);
+vec1.generic[35] = (vec1.generic[35] >> 1) + (vec2.generic[35] >> 1) + ((vec1.generic[35] | vec2.generic[35]) & 1);
+vec1.generic[36] = (vec1.generic[36] >> 1) + (vec2.generic[36] >> 1) + ((vec1.generic[36] | vec2.generic[36]) & 1);
+vec1.generic[37] = (vec1.generic[37] >> 1) + (vec2.generic[37] >> 1) + ((vec1.generic[37] | vec2.generic[37]) & 1);
+vec1.generic[38] = (vec1.generic[38] >> 1) + (vec2.generic[38] >> 1) + ((vec1.generic[38] | vec2.generic[38]) & 1);
+vec1.generic[39] = (vec1.generic[39] >> 1) + (vec2.generic[39] >> 1) + ((vec1.generic[39] | vec2.generic[39]) & 1);
+vec1.generic[40] = (vec1.generic[40] >> 1) + (vec2.generic[40] >> 1) + ((vec1.generic[40] | vec2.generic[40]) & 1);
+vec1.generic[41] = (vec1.generic[41] >> 1) + (vec2.generic[41] >> 1) + ((vec1.generic[41] | vec2.generic[41]) & 1);
+vec1.generic[42] = (vec1.generic[42] >> 1) + (vec2.generic[42] >> 1) + ((vec1.generic[42] | vec2.generic[42]) & 1);
+vec1.generic[43] = (vec1.generic[43] >> 1) + (vec2.generic[43] >> 1) + ((vec1.generic[43] | vec2.generic[43]) & 1);
+vec1.generic[44] = (vec1.generic[44] >> 1) + (vec2.generic[44] >> 1) + ((vec1.generic[44] | vec2.generic[44]) & 1);
+vec1.generic[45] = (vec1.generic[45] >> 1) + (vec2.generic[45] >> 1) + ((vec1.generic[45] | vec2.generic[45]) & 1);
+vec1.generic[46] = (vec1.generic[46] >> 1) + (vec2.generic[46] >> 1) + ((vec1.generic[46] | vec2.generic[46]) & 1);
+vec1.generic[47] = (vec1.generic[47] >> 1) + (vec2.generic[47] >> 1) + ((vec1.generic[47] | vec2.generic[47]) & 1);
+vec1.generic[48] = (vec1.generic[48] >> 1) + (vec2.generic[48] >> 1) + ((vec1.generic[48] | vec2.generic[48]) & 1);
+vec1.generic[49] = (vec1.generic[49] >> 1) + (vec2.generic[49] >> 1) + ((vec1.generic[49] | vec2.generic[49]) & 1);
+vec1.generic[50] = (vec1.generic[50] >> 1) + (vec2.generic[50] >> 1) + ((vec1.generic[50] | vec2.generic[50]) & 1);
+vec1.generic[51] = (vec1.generic[51] >> 1) + (vec2.generic[51] >> 1) + ((vec1.generic[51] | vec2.generic[51]) & 1);
+vec1.generic[52] = (vec1.generic[52] >> 1) + (vec2.generic[52] >> 1) + ((vec1.generic[52] | vec2.generic[52]) & 1);
+vec1.generic[53] = (vec1.generic[53] >> 1) + (vec2.generic[53] >> 1) + ((vec1.generic[53] | vec2.generic[53]) & 1);
+vec1.generic[54] = (vec1.generic[54] >> 1) + (vec2.generic[54] >> 1) + ((vec1.generic[54] | vec2.generic[54]) & 1);
+vec1.generic[55] = (vec1.generic[55] >> 1) + (vec2.generic[55] >> 1) + ((vec1.generic[55] | vec2.generic[55]) & 1);
+vec1.generic[56] = (vec1.generic[56] >> 1) + (vec2.generic[56] >> 1) + ((vec1.generic[56] | vec2.generic[56]) & 1);
+vec1.generic[57] = (vec1.generic[57] >> 1) + (vec2.generic[57] >> 1) + ((vec1.generic[57] | vec2.generic[57]) & 1);
+vec1.generic[58] = (vec1.generic[58] >> 1) + (vec2.generic[58] >> 1) + ((vec1.generic[58] | vec2.generic[58]) & 1);
+vec1.generic[59] = (vec1.generic[59] >> 1) + (vec2.generic[59] >> 1) + ((vec1.generic[59] | vec2.generic[59]) & 1);
+vec1.generic[60] = (vec1.generic[60] >> 1) + (vec2.generic[60] >> 1) + ((vec1.generic[60] | vec2.generic[60]) & 1);
+vec1.generic[61] = (vec1.generic[61] >> 1) + (vec2.generic[61] >> 1) + ((vec1.generic[61] | vec2.generic[61]) & 1);
+vec1.generic[62] = (vec1.generic[62] >> 1) + (vec2.generic[62] >> 1) + ((vec1.generic[62] | vec2.generic[62]) & 1);
+vec1.generic[63] = (vec1.generic[63] >> 1) + (vec2.generic[63] >> 1) + ((vec1.generic[63] | vec2.generic[63]) & 1);
+	return vec1;
+}
+# define VUINT8x64_AVG_DEFINED
+#endif
+#if !defined(VUINT8x64_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] & vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] & vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] & vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] & vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] & vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] & vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] & vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] & vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] & vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] & vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] & vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] & vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] & vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] & vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] & vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] & vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] & vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] & vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] & vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] & vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] & vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] & vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] & vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] & vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] & vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] & vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] & vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] & vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] & vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] & vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] & vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] & vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_AND_DEFINED
+#endif
+#if !defined(VUINT8x64_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] | vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] | vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] | vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] | vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] | vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] | vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] | vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] | vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] | vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] | vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] | vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] | vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] | vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] | vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] | vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] | vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] | vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] | vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] | vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] | vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] | vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] | vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] | vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] | vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] | vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] | vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] | vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] | vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] | vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] | vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] | vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] | vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_OR_DEFINED
+#endif
+#if !defined(VUINT8x64_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] ^ vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] ^ vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] ^ vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] ^ vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] ^ vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] ^ vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] ^ vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] ^ vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] ^ vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] ^ vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] ^ vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] ^ vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] ^ vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] ^ vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] ^ vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] ^ vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] ^ vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] ^ vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] ^ vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] ^ vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] ^ vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] ^ vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] ^ vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] ^ vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] ^ vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] ^ vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] ^ vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] ^ vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] ^ vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] ^ vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] ^ vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] ^ vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_XOR_DEFINED
+#endif
+#if !defined(VUINT8x64_NOT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	vec.generic[32] = ~vec.generic[32];
+	vec.generic[33] = ~vec.generic[33];
+	vec.generic[34] = ~vec.generic[34];
+	vec.generic[35] = ~vec.generic[35];
+	vec.generic[36] = ~vec.generic[36];
+	vec.generic[37] = ~vec.generic[37];
+	vec.generic[38] = ~vec.generic[38];
+	vec.generic[39] = ~vec.generic[39];
+	vec.generic[40] = ~vec.generic[40];
+	vec.generic[41] = ~vec.generic[41];
+	vec.generic[42] = ~vec.generic[42];
+	vec.generic[43] = ~vec.generic[43];
+	vec.generic[44] = ~vec.generic[44];
+	vec.generic[45] = ~vec.generic[45];
+	vec.generic[46] = ~vec.generic[46];
+	vec.generic[47] = ~vec.generic[47];
+	vec.generic[48] = ~vec.generic[48];
+	vec.generic[49] = ~vec.generic[49];
+	vec.generic[50] = ~vec.generic[50];
+	vec.generic[51] = ~vec.generic[51];
+	vec.generic[52] = ~vec.generic[52];
+	vec.generic[53] = ~vec.generic[53];
+	vec.generic[54] = ~vec.generic[54];
+	vec.generic[55] = ~vec.generic[55];
+	vec.generic[56] = ~vec.generic[56];
+	vec.generic[57] = ~vec.generic[57];
+	vec.generic[58] = ~vec.generic[58];
+	vec.generic[59] = ~vec.generic[59];
+	vec.generic[60] = ~vec.generic[60];
+	vec.generic[61] = ~vec.generic[61];
+	vec.generic[62] = ~vec.generic[62];
+	vec.generic[63] = ~vec.generic[63];
+	return vec;
+}
+# define VUINT8x64_NOT_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] < vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] < vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] < vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] < vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] < vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] < vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] < vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] < vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] < vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] < vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] < vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] < vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] < vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] < vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] < vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] < vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] < vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] < vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] < vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] < vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] < vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] < vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] < vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] < vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] < vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] < vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] < vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] < vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] < vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] < vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] < vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] < vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x64_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] == vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] == vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] == vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] == vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] == vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] == vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] == vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] == vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] == vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] == vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] == vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] == vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] == vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] == vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] == vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] == vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] == vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] == vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] == vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] == vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] == vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] == vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] == vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] == vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] == vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] == vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] == vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] == vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] == vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] == vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] == vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] == vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x64_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] > vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] > vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] > vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] > vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] > vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] > vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] > vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] > vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] > vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] > vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] > vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] > vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] > vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] > vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] > vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] > vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] > vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] > vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] > vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] > vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] > vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] > vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] > vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] > vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] > vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] > vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] > vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] > vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] > vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] > vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] > vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] > vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x64_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] <= vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] <= vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] <= vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] <= vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] <= vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] <= vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] <= vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] <= vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] <= vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] <= vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] <= vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] <= vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] <= vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] <= vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] <= vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] <= vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] <= vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] <= vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] <= vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] <= vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] <= vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] <= vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] <= vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] <= vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] <= vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] <= vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] <= vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] <= vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] <= vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] <= vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] <= vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] <= vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x64_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x64_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[32], (vec1.generic[32] >= vec2.generic[32]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[33], (vec1.generic[33] >= vec2.generic[33]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[34], (vec1.generic[34] >= vec2.generic[34]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[35], (vec1.generic[35] >= vec2.generic[35]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[36], (vec1.generic[36] >= vec2.generic[36]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[37], (vec1.generic[37] >= vec2.generic[37]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[38], (vec1.generic[38] >= vec2.generic[38]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[39], (vec1.generic[39] >= vec2.generic[39]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[40], (vec1.generic[40] >= vec2.generic[40]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[41], (vec1.generic[41] >= vec2.generic[41]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[42], (vec1.generic[42] >= vec2.generic[42]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[43], (vec1.generic[43] >= vec2.generic[43]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[44], (vec1.generic[44] >= vec2.generic[44]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[45], (vec1.generic[45] >= vec2.generic[45]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[46], (vec1.generic[46] >= vec2.generic[46]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[47], (vec1.generic[47] >= vec2.generic[47]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[48], (vec1.generic[48] >= vec2.generic[48]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[49], (vec1.generic[49] >= vec2.generic[49]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[50], (vec1.generic[50] >= vec2.generic[50]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[51], (vec1.generic[51] >= vec2.generic[51]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[52], (vec1.generic[52] >= vec2.generic[52]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[53], (vec1.generic[53] >= vec2.generic[53]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[54], (vec1.generic[54] >= vec2.generic[54]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[55], (vec1.generic[55] >= vec2.generic[55]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[56], (vec1.generic[56] >= vec2.generic[56]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[57], (vec1.generic[57] >= vec2.generic[57]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[58], (vec1.generic[58] >= vec2.generic[58]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[59], (vec1.generic[59] >= vec2.generic[59]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[60], (vec1.generic[60] >= vec2.generic[60]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[61], (vec1.generic[61] >= vec2.generic[61]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[62], (vec1.generic[62] >= vec2.generic[62]) ? 0xFF : 0, 1);
+	memset(&vec1.generic[63], (vec1.generic[63] >= vec2.generic[63]) ? 0xFF : 0, 1);
+	return vec1;
+}
+# define VUINT8x64_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x64_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] < vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] < vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] < vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] < vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] < vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] < vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] < vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] < vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] < vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] < vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] < vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] < vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] < vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] < vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] < vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] < vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] < vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] < vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] < vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] < vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] < vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] < vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] < vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] < vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] < vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] < vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] < vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] < vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] < vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] < vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] < vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] < vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_MIN_DEFINED
+#endif
+#if !defined(VUINT8x64_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	vec1.generic[32] = (vec1.generic[32] > vec2.generic[32]) ? (vec1.generic[32]) : (vec2.generic[32]);
+	vec1.generic[33] = (vec1.generic[33] > vec2.generic[33]) ? (vec1.generic[33]) : (vec2.generic[33]);
+	vec1.generic[34] = (vec1.generic[34] > vec2.generic[34]) ? (vec1.generic[34]) : (vec2.generic[34]);
+	vec1.generic[35] = (vec1.generic[35] > vec2.generic[35]) ? (vec1.generic[35]) : (vec2.generic[35]);
+	vec1.generic[36] = (vec1.generic[36] > vec2.generic[36]) ? (vec1.generic[36]) : (vec2.generic[36]);
+	vec1.generic[37] = (vec1.generic[37] > vec2.generic[37]) ? (vec1.generic[37]) : (vec2.generic[37]);
+	vec1.generic[38] = (vec1.generic[38] > vec2.generic[38]) ? (vec1.generic[38]) : (vec2.generic[38]);
+	vec1.generic[39] = (vec1.generic[39] > vec2.generic[39]) ? (vec1.generic[39]) : (vec2.generic[39]);
+	vec1.generic[40] = (vec1.generic[40] > vec2.generic[40]) ? (vec1.generic[40]) : (vec2.generic[40]);
+	vec1.generic[41] = (vec1.generic[41] > vec2.generic[41]) ? (vec1.generic[41]) : (vec2.generic[41]);
+	vec1.generic[42] = (vec1.generic[42] > vec2.generic[42]) ? (vec1.generic[42]) : (vec2.generic[42]);
+	vec1.generic[43] = (vec1.generic[43] > vec2.generic[43]) ? (vec1.generic[43]) : (vec2.generic[43]);
+	vec1.generic[44] = (vec1.generic[44] > vec2.generic[44]) ? (vec1.generic[44]) : (vec2.generic[44]);
+	vec1.generic[45] = (vec1.generic[45] > vec2.generic[45]) ? (vec1.generic[45]) : (vec2.generic[45]);
+	vec1.generic[46] = (vec1.generic[46] > vec2.generic[46]) ? (vec1.generic[46]) : (vec2.generic[46]);
+	vec1.generic[47] = (vec1.generic[47] > vec2.generic[47]) ? (vec1.generic[47]) : (vec2.generic[47]);
+	vec1.generic[48] = (vec1.generic[48] > vec2.generic[48]) ? (vec1.generic[48]) : (vec2.generic[48]);
+	vec1.generic[49] = (vec1.generic[49] > vec2.generic[49]) ? (vec1.generic[49]) : (vec2.generic[49]);
+	vec1.generic[50] = (vec1.generic[50] > vec2.generic[50]) ? (vec1.generic[50]) : (vec2.generic[50]);
+	vec1.generic[51] = (vec1.generic[51] > vec2.generic[51]) ? (vec1.generic[51]) : (vec2.generic[51]);
+	vec1.generic[52] = (vec1.generic[52] > vec2.generic[52]) ? (vec1.generic[52]) : (vec2.generic[52]);
+	vec1.generic[53] = (vec1.generic[53] > vec2.generic[53]) ? (vec1.generic[53]) : (vec2.generic[53]);
+	vec1.generic[54] = (vec1.generic[54] > vec2.generic[54]) ? (vec1.generic[54]) : (vec2.generic[54]);
+	vec1.generic[55] = (vec1.generic[55] > vec2.generic[55]) ? (vec1.generic[55]) : (vec2.generic[55]);
+	vec1.generic[56] = (vec1.generic[56] > vec2.generic[56]) ? (vec1.generic[56]) : (vec2.generic[56]);
+	vec1.generic[57] = (vec1.generic[57] > vec2.generic[57]) ? (vec1.generic[57]) : (vec2.generic[57]);
+	vec1.generic[58] = (vec1.generic[58] > vec2.generic[58]) ? (vec1.generic[58]) : (vec2.generic[58]);
+	vec1.generic[59] = (vec1.generic[59] > vec2.generic[59]) ? (vec1.generic[59]) : (vec2.generic[59]);
+	vec1.generic[60] = (vec1.generic[60] > vec2.generic[60]) ? (vec1.generic[60]) : (vec2.generic[60]);
+	vec1.generic[61] = (vec1.generic[61] > vec2.generic[61]) ? (vec1.generic[61]) : (vec2.generic[61]);
+	vec1.generic[62] = (vec1.generic[62] > vec2.generic[62]) ? (vec1.generic[62]) : (vec2.generic[62]);
+	vec1.generic[63] = (vec1.generic[63] > vec2.generic[63]) ? (vec1.generic[63]) : (vec2.generic[63]);
+	return vec1;
+}
+# define VUINT8x64_MAX_DEFINED
+#endif
+#if !defined(VUINT8x64_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	vec1.generic[32] >>= vec2.generic[0];
+	vec1.generic[33] >>= vec2.generic[0];
+	vec1.generic[34] >>= vec2.generic[0];
+	vec1.generic[35] >>= vec2.generic[0];
+	vec1.generic[36] >>= vec2.generic[0];
+	vec1.generic[37] >>= vec2.generic[0];
+	vec1.generic[38] >>= vec2.generic[0];
+	vec1.generic[39] >>= vec2.generic[0];
+	vec1.generic[40] >>= vec2.generic[0];
+	vec1.generic[41] >>= vec2.generic[0];
+	vec1.generic[42] >>= vec2.generic[0];
+	vec1.generic[43] >>= vec2.generic[0];
+	vec1.generic[44] >>= vec2.generic[0];
+	vec1.generic[45] >>= vec2.generic[0];
+	vec1.generic[46] >>= vec2.generic[0];
+	vec1.generic[47] >>= vec2.generic[0];
+	vec1.generic[48] >>= vec2.generic[0];
+	vec1.generic[49] >>= vec2.generic[0];
+	vec1.generic[50] >>= vec2.generic[0];
+	vec1.generic[51] >>= vec2.generic[0];
+	vec1.generic[52] >>= vec2.generic[0];
+	vec1.generic[53] >>= vec2.generic[0];
+	vec1.generic[54] >>= vec2.generic[0];
+	vec1.generic[55] >>= vec2.generic[0];
+	vec1.generic[56] >>= vec2.generic[0];
+	vec1.generic[57] >>= vec2.generic[0];
+	vec1.generic[58] >>= vec2.generic[0];
+	vec1.generic[59] >>= vec2.generic[0];
+	vec1.generic[60] >>= vec2.generic[0];
+	vec1.generic[61] >>= vec2.generic[0];
+	vec1.generic[62] >>= vec2.generic[0];
+	vec1.generic[63] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x64_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x64_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	vec1.generic[32] >>= vec2.generic[0];
+	vec1.generic[33] >>= vec2.generic[0];
+	vec1.generic[34] >>= vec2.generic[0];
+	vec1.generic[35] >>= vec2.generic[0];
+	vec1.generic[36] >>= vec2.generic[0];
+	vec1.generic[37] >>= vec2.generic[0];
+	vec1.generic[38] >>= vec2.generic[0];
+	vec1.generic[39] >>= vec2.generic[0];
+	vec1.generic[40] >>= vec2.generic[0];
+	vec1.generic[41] >>= vec2.generic[0];
+	vec1.generic[42] >>= vec2.generic[0];
+	vec1.generic[43] >>= vec2.generic[0];
+	vec1.generic[44] >>= vec2.generic[0];
+	vec1.generic[45] >>= vec2.generic[0];
+	vec1.generic[46] >>= vec2.generic[0];
+	vec1.generic[47] >>= vec2.generic[0];
+	vec1.generic[48] >>= vec2.generic[0];
+	vec1.generic[49] >>= vec2.generic[0];
+	vec1.generic[50] >>= vec2.generic[0];
+	vec1.generic[51] >>= vec2.generic[0];
+	vec1.generic[52] >>= vec2.generic[0];
+	vec1.generic[53] >>= vec2.generic[0];
+	vec1.generic[54] >>= vec2.generic[0];
+	vec1.generic[55] >>= vec2.generic[0];
+	vec1.generic[56] >>= vec2.generic[0];
+	vec1.generic[57] >>= vec2.generic[0];
+	vec1.generic[58] >>= vec2.generic[0];
+	vec1.generic[59] >>= vec2.generic[0];
+	vec1.generic[60] >>= vec2.generic[0];
+	vec1.generic[61] >>= vec2.generic[0];
+	vec1.generic[62] >>= vec2.generic[0];
+	vec1.generic[63] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x64_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x64_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	vec1.generic[16] <<= vec2.generic[0];
+	vec1.generic[17] <<= vec2.generic[0];
+	vec1.generic[18] <<= vec2.generic[0];
+	vec1.generic[19] <<= vec2.generic[0];
+	vec1.generic[20] <<= vec2.generic[0];
+	vec1.generic[21] <<= vec2.generic[0];
+	vec1.generic[22] <<= vec2.generic[0];
+	vec1.generic[23] <<= vec2.generic[0];
+	vec1.generic[24] <<= vec2.generic[0];
+	vec1.generic[25] <<= vec2.generic[0];
+	vec1.generic[26] <<= vec2.generic[0];
+	vec1.generic[27] <<= vec2.generic[0];
+	vec1.generic[28] <<= vec2.generic[0];
+	vec1.generic[29] <<= vec2.generic[0];
+	vec1.generic[30] <<= vec2.generic[0];
+	vec1.generic[31] <<= vec2.generic[0];
+	vec1.generic[32] <<= vec2.generic[0];
+	vec1.generic[33] <<= vec2.generic[0];
+	vec1.generic[34] <<= vec2.generic[0];
+	vec1.generic[35] <<= vec2.generic[0];
+	vec1.generic[36] <<= vec2.generic[0];
+	vec1.generic[37] <<= vec2.generic[0];
+	vec1.generic[38] <<= vec2.generic[0];
+	vec1.generic[39] <<= vec2.generic[0];
+	vec1.generic[40] <<= vec2.generic[0];
+	vec1.generic[41] <<= vec2.generic[0];
+	vec1.generic[42] <<= vec2.generic[0];
+	vec1.generic[43] <<= vec2.generic[0];
+	vec1.generic[44] <<= vec2.generic[0];
+	vec1.generic[45] <<= vec2.generic[0];
+	vec1.generic[46] <<= vec2.generic[0];
+	vec1.generic[47] <<= vec2.generic[0];
+	vec1.generic[48] <<= vec2.generic[0];
+	vec1.generic[49] <<= vec2.generic[0];
+	vec1.generic[50] <<= vec2.generic[0];
+	vec1.generic[51] <<= vec2.generic[0];
+	vec1.generic[52] <<= vec2.generic[0];
+	vec1.generic[53] <<= vec2.generic[0];
+	vec1.generic[54] <<= vec2.generic[0];
+	vec1.generic[55] <<= vec2.generic[0];
+	vec1.generic[56] <<= vec2.generic[0];
+	vec1.generic[57] <<= vec2.generic[0];
+	vec1.generic[58] <<= vec2.generic[0];
+	vec1.generic[59] <<= vec2.generic[0];
+	vec1.generic[60] <<= vec2.generic[0];
+	vec1.generic[61] <<= vec2.generic[0];
+	vec1.generic[62] <<= vec2.generic[0];
+	vec1.generic[63] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT8x64_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x)
+{
+	vint16x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VINT16x2_SPLAT_DEFINED
 #endif
-#ifndef VINT16x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 16, 2)
+#if !defined(VINT16x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
 # define VINT16x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(/* nothing */, 16, 2)
+#if !defined(VINT16x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
 # define VINT16x2_LOAD_DEFINED
 #endif
-#ifndef VINT16x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(/* nothing */, 16, 2)
+#if !defined(VINT16x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 x[2])
+{
+	memcpy(x, vec.generic, 4);
+}
 # define VINT16x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x2_STORE_DEFINED
-VEC_GENERIC_STORE(/* nothing */, 16, 2)
+#if !defined(VINT16x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 x[2])
+{
+	memcpy(x, vec.generic, 4);
+}
 # define VINT16x2_STORE_DEFINED
 #endif
-#ifndef VINT16x2_ADD_DEFINED
-VEC_GENERIC_ADD(/* nothing */, 16, 2)
+#if !defined(VINT16x2_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_ADD_DEFINED
 #endif
-#ifndef VINT16x2_SUB_DEFINED
-VEC_GENERIC_SUB(/* nothing */, 16, 2)
+#if !defined(VINT16x2_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_SUB_DEFINED
 #endif
-#ifndef VINT16x2_MUL_DEFINED
-VEC_GENERIC_MUL(/* nothing */, 16, 2)
+#if !defined(VINT16x2_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_MUL_DEFINED
 #endif
-#ifndef VINT16x2_DIV_DEFINED
-VEC_GENERIC_DIV(/* nothing */, 16, 2)
+#if !defined(VINT16x2_DIV_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_div(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT16x2_DIV_DEFINED
 #endif
-#ifndef VINT16x2_MOD_DEFINED
-VEC_GENERIC_MOD(/* nothing */, 16, 2)
+#if !defined(VINT16x2_MOD_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_mod(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT16x2_MOD_DEFINED
 #endif
-#ifndef VINT16x2_AVG_DEFINED
-VEC_GENERIC_AVG(/* nothing */, 16, 2)
+#if !defined(VINT16x2_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2)
+{
+	vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
 # define VINT16x2_AVG_DEFINED
 #endif
-#ifndef VINT16x2_AND_DEFINED
-VEC_GENERIC_AND(/* nothing */, 16, 2)
+#if !defined(VINT16x2_AND_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_AND_DEFINED
 #endif
-#ifndef VINT16x2_OR_DEFINED
-VEC_GENERIC_OR(/* nothing */, 16, 2)
+#if !defined(VINT16x2_OR_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_OR_DEFINED
 #endif
-#ifndef VINT16x2_XOR_DEFINED
-VEC_GENERIC_XOR(/* nothing */, 16, 2)
+#if !defined(VINT16x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_XOR_DEFINED
 #endif
-#ifndef VINT16x2_NOT_DEFINED
-VEC_GENERIC_NOT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VINT16x2_NOT_DEFINED
 #endif
-#ifndef VINT16x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VINT16x2_CMPLT_DEFINED
 #endif
-#ifndef VINT16x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(/* nothing */, 16, 2)
+#if !defined(VINT16x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VINT16x2_CMPEQ_DEFINED
 #endif
-#ifndef VINT16x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VINT16x2_CMPGT_DEFINED
 #endif
-#ifndef VINT16x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(/* nothing */, 16, 2)
+#if !defined(VINT16x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VINT16x2_CMPLE_DEFINED
 #endif
-#ifndef VINT16x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(/* nothing */, 16, 2)
+#if !defined(VINT16x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VINT16x2_CMPGE_DEFINED
 #endif
-#ifndef VINT16x2_MIN_DEFINED
-VEC_GENERIC_MIN(/* nothing */, 16, 2)
+#if !defined(VINT16x2_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_MIN_DEFINED
 #endif
-#ifndef VINT16x2_MAX_DEFINED
-VEC_GENERIC_MAX(/* nothing */, 16, 2)
+#if !defined(VINT16x2_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_MAX_DEFINED
 #endif
-#ifndef VINT16x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+	return vec1;
+}
 # define VINT16x2_RSHIFT_DEFINED
 #endif
-#ifndef VINT16x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT16x2_LRSHIFT_DEFINED
 #endif
-#ifndef VINT16x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(/* nothing */, 16, 2)
+#if !defined(VINT16x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT16x2_LSHIFT_DEFINED
 #endif
-
-
-/* vint16x2 */
-
-#ifndef VUINT16x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(u, 16, 2)
+#if !defined(VUINT16x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x)
+{
+	vuint16x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VUINT16x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(u, 16, 2)
+#if !defined(VUINT16x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
 # define VUINT16x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(u, 16, 2)
+#if !defined(VUINT16x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	memcpy(vec.generic, x, 4);
+	return vec;
+}
 # define VUINT16x2_LOAD_DEFINED
 #endif
-#ifndef VUINT16x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(u, 16, 2)
+#if !defined(VUINT16x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 x[2])
+{
+	memcpy(x, vec.generic, 4);
+}
 # define VUINT16x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x2_STORE_DEFINED
-VEC_GENERIC_STORE(u, 16, 2)
+#if !defined(VUINT16x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 x[2])
+{
+	memcpy(x, vec.generic, 4);
+}
 # define VUINT16x2_STORE_DEFINED
 #endif
-#ifndef VUINT16x2_ADD_DEFINED
-VEC_GENERIC_ADD(u, 16, 2)
+#if !defined(VUINT16x2_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_ADD_DEFINED
 #endif
-#ifndef VUINT16x2_SUB_DEFINED
-VEC_GENERIC_SUB(u, 16, 2)
+#if !defined(VUINT16x2_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_SUB_DEFINED
 #endif
-#ifndef VUINT16x2_MUL_DEFINED
-VEC_GENERIC_MUL(u, 16, 2)
+#if !defined(VUINT16x2_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_MUL_DEFINED
 #endif
-#ifndef VUINT16x2_DIV_DEFINED
-VEC_GENERIC_DIV(u, 16, 2)
+#if !defined(VUINT16x2_DIV_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_div(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT16x2_DIV_DEFINED
 #endif
-#ifndef VUINT16x2_MOD_DEFINED
-VEC_GENERIC_MOD(u, 16, 2)
+#if !defined(VUINT16x2_MOD_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_mod(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT16x2_MOD_DEFINED
 #endif
-#ifndef VUINT16x2_AVG_DEFINED
-VEC_GENERIC_AVG(u, 16, 2)
+#if !defined(VUINT16x2_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+	return vec1;
+}
 # define VUINT16x2_AVG_DEFINED
 #endif
-#ifndef VUINT16x2_AND_DEFINED
-VEC_GENERIC_AND(u, 16, 2)
+#if !defined(VUINT16x2_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_AND_DEFINED
 #endif
-#ifndef VUINT16x2_OR_DEFINED
-VEC_GENERIC_OR(u, 16, 2)
+#if !defined(VUINT16x2_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_OR_DEFINED
 #endif
-#ifndef VUINT16x2_XOR_DEFINED
-VEC_GENERIC_XOR(u, 16, 2)
+#if !defined(VUINT16x2_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_XOR_DEFINED
 #endif
-#ifndef VUINT16x2_NOT_DEFINED
-VEC_GENERIC_NOT(u, 16, 2)
+#if !defined(VUINT16x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VUINT16x2_NOT_DEFINED
 #endif
-#ifndef VUINT16x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(u, 16, 2)
+#if !defined(VUINT16x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VUINT16x2_CMPLT_DEFINED
 #endif
-#ifndef VUINT16x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(u, 16, 2)
+#if !defined(VUINT16x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VUINT16x2_CMPEQ_DEFINED
 #endif
-#ifndef VUINT16x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(u, 16, 2)
+#if !defined(VUINT16x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VUINT16x2_CMPGT_DEFINED
 #endif
-#ifndef VUINT16x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(u, 16, 2)
+#if !defined(VUINT16x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VUINT16x2_CMPLE_DEFINED
 #endif
-#ifndef VUINT16x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(u, 16, 2)
+#if !defined(VUINT16x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	return vec1;
+}
 # define VUINT16x2_CMPGE_DEFINED
 #endif
-#ifndef VUINT16x2_MIN_DEFINED
-VEC_GENERIC_MIN(u, 16, 2)
+#if !defined(VUINT16x2_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_MIN_DEFINED
 #endif
-#ifndef VUINT16x2_MAX_DEFINED
-VEC_GENERIC_MAX(u, 16, 2)
+#if !defined(VUINT16x2_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT16x2_MAX_DEFINED
 #endif
-#ifndef VUINT16x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(u, 16, 2)
+#if !defined(VUINT16x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT16x2_RSHIFT_DEFINED
 #endif
-#ifndef VUINT16x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(u, 16, 2)
+#if !defined(VUINT16x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT16x2_LRSHIFT_DEFINED
 #endif
-#ifndef VUINT16x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(u, 16, 2)
+#if !defined(VUINT16x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	return vec1;
+}
 # define VUINT16x2_LSHIFT_DEFINED
 #endif
-
-
-/* vuint32x2 */
-
-#ifndef VINT32x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(/* nothing */, 32, 2)
+#if !defined(VINT16x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x)
+{
+	vint16x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VINT16x4_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VINT16x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VINT16x4_LOAD_DEFINED
+#endif
+#if !defined(VINT16x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 x[4])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VINT16x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 x[4])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VINT16x4_STORE_DEFINED
+#endif
+#if !defined(VINT16x4_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_ADD_DEFINED
+#endif
+#if !defined(VINT16x4_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_SUB_DEFINED
+#endif
+#if !defined(VINT16x4_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_MUL_DEFINED
+#endif
+#if !defined(VINT16x4_DIV_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_div(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT16x4_DIV_DEFINED
+#endif
+#if !defined(VINT16x4_MOD_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_mod(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT16x4_MOD_DEFINED
+#endif
+#if !defined(VINT16x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2)
+{
+	vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT16x4_AVG_DEFINED
+#endif
+#if !defined(VINT16x4_AND_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_AND_DEFINED
+#endif
+#if !defined(VINT16x4_OR_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_OR_DEFINED
+#endif
+#if !defined(VINT16x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_XOR_DEFINED
+#endif
+#if !defined(VINT16x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VINT16x4_NOT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x4_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_MIN_DEFINED
+#endif
+#if !defined(VINT16x4_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_MAX_DEFINED
+#endif
+#if !defined(VINT16x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+	return vec1;
+}
+# define VINT16x4_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT16x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT16x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x)
+{
+	vuint16x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VUINT16x4_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VUINT16x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VUINT16x4_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 x[4])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VUINT16x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 x[4])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VUINT16x4_STORE_DEFINED
+#endif
+#if !defined(VUINT16x4_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_ADD_DEFINED
+#endif
+#if !defined(VUINT16x4_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_SUB_DEFINED
+#endif
+#if !defined(VUINT16x4_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_MUL_DEFINED
+#endif
+#if !defined(VUINT16x4_DIV_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_div(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT16x4_DIV_DEFINED
+#endif
+#if !defined(VUINT16x4_MOD_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_mod(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT16x4_MOD_DEFINED
+#endif
+#if !defined(VUINT16x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+	return vec1;
+}
+# define VUINT16x4_AVG_DEFINED
+#endif
+#if !defined(VUINT16x4_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_AND_DEFINED
+#endif
+#if !defined(VUINT16x4_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_OR_DEFINED
+#endif
+#if !defined(VUINT16x4_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_XOR_DEFINED
+#endif
+#if !defined(VUINT16x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VUINT16x4_NOT_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x4_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x4_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x4_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x4_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x4_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x4_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_MIN_DEFINED
+#endif
+#if !defined(VUINT16x4_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT16x4_MAX_DEFINED
+#endif
+#if !defined(VUINT16x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x4_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
+{
+	vint16x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VINT16x8_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT16x8_LOAD_DEFINED
+#endif
+#if !defined(VINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 x[8])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT16x8_STORE_DEFINED
+#endif
+#if !defined(VINT16x8_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_ADD_DEFINED
+#endif
+#if !defined(VINT16x8_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_SUB_DEFINED
+#endif
+#if !defined(VINT16x8_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_MUL_DEFINED
+#endif
+#if !defined(VINT16x8_DIV_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_div(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT16x8_DIV_DEFINED
+#endif
+#if !defined(VINT16x8_MOD_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_mod(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT16x8_MOD_DEFINED
+#endif
+#if !defined(VINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+{
+	vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT16x8_AVG_DEFINED
+#endif
+#if !defined(VINT16x8_AND_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_AND_DEFINED
+#endif
+#if !defined(VINT16x8_OR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_OR_DEFINED
+#endif
+#if !defined(VINT16x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_XOR_DEFINED
+#endif
+#if !defined(VINT16x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VINT16x8_NOT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x8_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_MIN_DEFINED
+#endif
+#if !defined(VINT16x8_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_MAX_DEFINED
+#endif
+#if !defined(VINT16x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+	return vec1;
+}
+# define VINT16x8_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT16x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
+{
+	vuint16x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VUINT16x8_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT16x8_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 x[8])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT16x8_STORE_DEFINED
+#endif
+#if !defined(VUINT16x8_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_ADD_DEFINED
+#endif
+#if !defined(VUINT16x8_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_SUB_DEFINED
+#endif
+#if !defined(VUINT16x8_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_MUL_DEFINED
+#endif
+#if !defined(VUINT16x8_DIV_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_div(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT16x8_DIV_DEFINED
+#endif
+#if !defined(VUINT16x8_MOD_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mod(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT16x8_MOD_DEFINED
+#endif
+#if !defined(VUINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+	return vec1;
+}
+# define VUINT16x8_AVG_DEFINED
+#endif
+#if !defined(VUINT16x8_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_AND_DEFINED
+#endif
+#if !defined(VUINT16x8_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_OR_DEFINED
+#endif
+#if !defined(VUINT16x8_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_XOR_DEFINED
+#endif
+#if !defined(VUINT16x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VUINT16x8_NOT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x8_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x8_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x8_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x8_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x8_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x8_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_MIN_DEFINED
+#endif
+#if !defined(VUINT16x8_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT16x8_MAX_DEFINED
+#endif
+#if !defined(VUINT16x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x)
+{
+	vint16x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VINT16x16_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT16x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT16x16_LOAD_DEFINED
+#endif
+#if !defined(VINT16x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 x[16])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT16x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 x[16])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT16x16_STORE_DEFINED
+#endif
+#if !defined(VINT16x16_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_ADD_DEFINED
+#endif
+#if !defined(VINT16x16_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_SUB_DEFINED
+#endif
+#if !defined(VINT16x16_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_MUL_DEFINED
+#endif
+#if !defined(VINT16x16_DIV_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_div(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT16x16_DIV_DEFINED
+#endif
+#if !defined(VINT16x16_MOD_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_mod(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT16x16_MOD_DEFINED
+#endif
+#if !defined(VINT16x16_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2)
+{
+	vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT16x16_AVG_DEFINED
+#endif
+#if !defined(VINT16x16_AND_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_AND_DEFINED
+#endif
+#if !defined(VINT16x16_OR_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_OR_DEFINED
+#endif
+#if !defined(VINT16x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_XOR_DEFINED
+#endif
+#if !defined(VINT16x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VINT16x16_NOT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x16_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_MIN_DEFINED
+#endif
+#if !defined(VINT16x16_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_MAX_DEFINED
+#endif
+#if !defined(VINT16x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+	return vec1;
+}
+# define VINT16x16_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT16x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT16x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x)
+{
+	vuint16x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VUINT16x16_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT16x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT16x16_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 x[16])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT16x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 x[16])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT16x16_STORE_DEFINED
+#endif
+#if !defined(VUINT16x16_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_ADD_DEFINED
+#endif
+#if !defined(VUINT16x16_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_SUB_DEFINED
+#endif
+#if !defined(VUINT16x16_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_MUL_DEFINED
+#endif
+#if !defined(VUINT16x16_DIV_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_div(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT16x16_DIV_DEFINED
+#endif
+#if !defined(VUINT16x16_MOD_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_mod(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT16x16_MOD_DEFINED
+#endif
+#if !defined(VUINT16x16_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+	return vec1;
+}
+# define VUINT16x16_AVG_DEFINED
+#endif
+#if !defined(VUINT16x16_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_AND_DEFINED
+#endif
+#if !defined(VUINT16x16_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_OR_DEFINED
+#endif
+#if !defined(VUINT16x16_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_XOR_DEFINED
+#endif
+#if !defined(VUINT16x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VUINT16x16_NOT_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x16_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x16_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x16_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x16_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x16_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x16_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_MIN_DEFINED
+#endif
+#if !defined(VUINT16x16_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT16x16_MAX_DEFINED
+#endif
+#if !defined(VUINT16x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x16_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x32_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x)
+{
+	vint16x32 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	return vec;
+}
+# define VINT16x32_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x32_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT16x32_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x32_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT16x32_LOAD_DEFINED
+#endif
+#if !defined(VINT16x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 x[32])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT16x32_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 x[32])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT16x32_STORE_DEFINED
+#endif
+#if !defined(VINT16x32_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_ADD_DEFINED
+#endif
+#if !defined(VINT16x32_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_SUB_DEFINED
+#endif
+#if !defined(VINT16x32_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_MUL_DEFINED
+#endif
+#if !defined(VINT16x32_DIV_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_div(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VINT16x32_DIV_DEFINED
+#endif
+#if !defined(VINT16x32_MOD_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_mod(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VINT16x32_MOD_DEFINED
+#endif
+#if !defined(VINT16x32_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2)
+{
+	vec_int16 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[16] % 2);
+	y_d_rem = (vec2.generic[16] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[16] = ((vec1.generic[16] / 2) + (vec2.generic[16] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[17] % 2);
+	y_d_rem = (vec2.generic[17] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[17] = ((vec1.generic[17] / 2) + (vec2.generic[17] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[18] % 2);
+	y_d_rem = (vec2.generic[18] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[18] = ((vec1.generic[18] / 2) + (vec2.generic[18] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[19] % 2);
+	y_d_rem = (vec2.generic[19] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[19] = ((vec1.generic[19] / 2) + (vec2.generic[19] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[20] % 2);
+	y_d_rem = (vec2.generic[20] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[20] = ((vec1.generic[20] / 2) + (vec2.generic[20] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[21] % 2);
+	y_d_rem = (vec2.generic[21] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[21] = ((vec1.generic[21] / 2) + (vec2.generic[21] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[22] % 2);
+	y_d_rem = (vec2.generic[22] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[22] = ((vec1.generic[22] / 2) + (vec2.generic[22] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[23] % 2);
+	y_d_rem = (vec2.generic[23] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[23] = ((vec1.generic[23] / 2) + (vec2.generic[23] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[24] % 2);
+	y_d_rem = (vec2.generic[24] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[24] = ((vec1.generic[24] / 2) + (vec2.generic[24] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[25] % 2);
+	y_d_rem = (vec2.generic[25] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[25] = ((vec1.generic[25] / 2) + (vec2.generic[25] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[26] % 2);
+	y_d_rem = (vec2.generic[26] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[26] = ((vec1.generic[26] / 2) + (vec2.generic[26] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[27] % 2);
+	y_d_rem = (vec2.generic[27] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[27] = ((vec1.generic[27] / 2) + (vec2.generic[27] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[28] % 2);
+	y_d_rem = (vec2.generic[28] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[28] = ((vec1.generic[28] / 2) + (vec2.generic[28] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[29] % 2);
+	y_d_rem = (vec2.generic[29] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[29] = ((vec1.generic[29] / 2) + (vec2.generic[29] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[30] % 2);
+	y_d_rem = (vec2.generic[30] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[30] = ((vec1.generic[30] / 2) + (vec2.generic[30] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[31] % 2);
+	y_d_rem = (vec2.generic[31] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[31] = ((vec1.generic[31] / 2) + (vec2.generic[31] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT16x32_AVG_DEFINED
+#endif
+#if !defined(VINT16x32_AND_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_AND_DEFINED
+#endif
+#if !defined(VINT16x32_OR_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_OR_DEFINED
+#endif
+#if !defined(VINT16x32_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_XOR_DEFINED
+#endif
+#if !defined(VINT16x32_NOT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	return vec;
+}
+# define VINT16x32_NOT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x32_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x32_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x32_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x32_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x32_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x32_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x32_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VINT16x32_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x32_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_MIN_DEFINED
+#endif
+#if !defined(VINT16x32_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_MAX_DEFINED
+#endif
+#if !defined(VINT16x32_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+vec1.generic[16] = ((~vec1.generic[16]) >> vec2.generic[16]);
+vec1.generic[17] = ((~vec1.generic[17]) >> vec2.generic[17]);
+vec1.generic[18] = ((~vec1.generic[18]) >> vec2.generic[18]);
+vec1.generic[19] = ((~vec1.generic[19]) >> vec2.generic[19]);
+vec1.generic[20] = ((~vec1.generic[20]) >> vec2.generic[20]);
+vec1.generic[21] = ((~vec1.generic[21]) >> vec2.generic[21]);
+vec1.generic[22] = ((~vec1.generic[22]) >> vec2.generic[22]);
+vec1.generic[23] = ((~vec1.generic[23]) >> vec2.generic[23]);
+vec1.generic[24] = ((~vec1.generic[24]) >> vec2.generic[24]);
+vec1.generic[25] = ((~vec1.generic[25]) >> vec2.generic[25]);
+vec1.generic[26] = ((~vec1.generic[26]) >> vec2.generic[26]);
+vec1.generic[27] = ((~vec1.generic[27]) >> vec2.generic[27]);
+vec1.generic[28] = ((~vec1.generic[28]) >> vec2.generic[28]);
+vec1.generic[29] = ((~vec1.generic[29]) >> vec2.generic[29]);
+vec1.generic[30] = ((~vec1.generic[30]) >> vec2.generic[30]);
+vec1.generic[31] = ((~vec1.generic[31]) >> vec2.generic[31]);
+	return vec1;
+}
+# define VINT16x32_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x32_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u >>= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u >>= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u >>= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u >>= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u >>= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u >>= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u >>= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u >>= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u >>= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u >>= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u >>= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u >>= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u >>= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u >>= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u >>= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u >>= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	return vec1;
+}
+# define VINT16x32_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x32_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	union { vec_uint16 u; vec_int16 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	x.s = vec1.generic[16];
+	x.u <<= vec2.generic[16];
+	vec1.generic[16] = x.s;
+	x.s = vec1.generic[17];
+	x.u <<= vec2.generic[17];
+	vec1.generic[17] = x.s;
+	x.s = vec1.generic[18];
+	x.u <<= vec2.generic[18];
+	vec1.generic[18] = x.s;
+	x.s = vec1.generic[19];
+	x.u <<= vec2.generic[19];
+	vec1.generic[19] = x.s;
+	x.s = vec1.generic[20];
+	x.u <<= vec2.generic[20];
+	vec1.generic[20] = x.s;
+	x.s = vec1.generic[21];
+	x.u <<= vec2.generic[21];
+	vec1.generic[21] = x.s;
+	x.s = vec1.generic[22];
+	x.u <<= vec2.generic[22];
+	vec1.generic[22] = x.s;
+	x.s = vec1.generic[23];
+	x.u <<= vec2.generic[23];
+	vec1.generic[23] = x.s;
+	x.s = vec1.generic[24];
+	x.u <<= vec2.generic[24];
+	vec1.generic[24] = x.s;
+	x.s = vec1.generic[25];
+	x.u <<= vec2.generic[25];
+	vec1.generic[25] = x.s;
+	x.s = vec1.generic[26];
+	x.u <<= vec2.generic[26];
+	vec1.generic[26] = x.s;
+	x.s = vec1.generic[27];
+	x.u <<= vec2.generic[27];
+	vec1.generic[27] = x.s;
+	x.s = vec1.generic[28];
+	x.u <<= vec2.generic[28];
+	vec1.generic[28] = x.s;
+	x.s = vec1.generic[29];
+	x.u <<= vec2.generic[29];
+	vec1.generic[29] = x.s;
+	x.s = vec1.generic[30];
+	x.u <<= vec2.generic[30];
+	vec1.generic[30] = x.s;
+	x.s = vec1.generic[31];
+	x.u <<= vec2.generic[31];
+	vec1.generic[31] = x.s;
+	return vec1;
+}
+# define VINT16x32_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x32_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x)
+{
+	vuint16x32 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	vec.generic[16] = x;
+	vec.generic[17] = x;
+	vec.generic[18] = x;
+	vec.generic[19] = x;
+	vec.generic[20] = x;
+	vec.generic[21] = x;
+	vec.generic[22] = x;
+	vec.generic[23] = x;
+	vec.generic[24] = x;
+	vec.generic[25] = x;
+	vec.generic[26] = x;
+	vec.generic[27] = x;
+	vec.generic[28] = x;
+	vec.generic[29] = x;
+	vec.generic[30] = x;
+	vec.generic[31] = x;
+	return vec;
+}
+# define VUINT16x32_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x32_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT16x32_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x32_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT16x32_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x32_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 x[32])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT16x32_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x32_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 x[32])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT16x32_STORE_DEFINED
+#endif
+#if !defined(VUINT16x32_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] + vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] + vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] + vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] + vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] + vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] + vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] + vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] + vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] + vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] + vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] + vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] + vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] + vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] + vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] + vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] + vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_ADD_DEFINED
+#endif
+#if !defined(VUINT16x32_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] - vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] - vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] - vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] - vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] - vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] - vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] - vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] - vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] - vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] - vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] - vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] - vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] - vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] - vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] - vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] - vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_SUB_DEFINED
+#endif
+#if !defined(VUINT16x32_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] * vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] * vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] * vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] * vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] * vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] * vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] * vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] * vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] * vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] * vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] * vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] * vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] * vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] * vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] * vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] * vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_MUL_DEFINED
+#endif
+#if !defined(VUINT16x32_DIV_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_div(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] / vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] / vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] / vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] / vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] / vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] / vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] / vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] / vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] / vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] / vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] / vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] / vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] / vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] / vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] / vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] / vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VUINT16x32_DIV_DEFINED
+#endif
+#if !defined(VUINT16x32_MOD_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_mod(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	vec1.generic[16] = (vec2.generic[16] ? (vec1.generic[16] % vec2.generic[16]) : 0);
+	vec1.generic[17] = (vec2.generic[17] ? (vec1.generic[17] % vec2.generic[17]) : 0);
+	vec1.generic[18] = (vec2.generic[18] ? (vec1.generic[18] % vec2.generic[18]) : 0);
+	vec1.generic[19] = (vec2.generic[19] ? (vec1.generic[19] % vec2.generic[19]) : 0);
+	vec1.generic[20] = (vec2.generic[20] ? (vec1.generic[20] % vec2.generic[20]) : 0);
+	vec1.generic[21] = (vec2.generic[21] ? (vec1.generic[21] % vec2.generic[21]) : 0);
+	vec1.generic[22] = (vec2.generic[22] ? (vec1.generic[22] % vec2.generic[22]) : 0);
+	vec1.generic[23] = (vec2.generic[23] ? (vec1.generic[23] % vec2.generic[23]) : 0);
+	vec1.generic[24] = (vec2.generic[24] ? (vec1.generic[24] % vec2.generic[24]) : 0);
+	vec1.generic[25] = (vec2.generic[25] ? (vec1.generic[25] % vec2.generic[25]) : 0);
+	vec1.generic[26] = (vec2.generic[26] ? (vec1.generic[26] % vec2.generic[26]) : 0);
+	vec1.generic[27] = (vec2.generic[27] ? (vec1.generic[27] % vec2.generic[27]) : 0);
+	vec1.generic[28] = (vec2.generic[28] ? (vec1.generic[28] % vec2.generic[28]) : 0);
+	vec1.generic[29] = (vec2.generic[29] ? (vec1.generic[29] % vec2.generic[29]) : 0);
+	vec1.generic[30] = (vec2.generic[30] ? (vec1.generic[30] % vec2.generic[30]) : 0);
+	vec1.generic[31] = (vec2.generic[31] ? (vec1.generic[31] % vec2.generic[31]) : 0);
+	return vec1;
+}
+# define VUINT16x32_MOD_DEFINED
+#endif
+#if !defined(VUINT16x32_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+vec1.generic[16] = (vec1.generic[16] >> 1) + (vec2.generic[16] >> 1) + ((vec1.generic[16] | vec2.generic[16]) & 1);
+vec1.generic[17] = (vec1.generic[17] >> 1) + (vec2.generic[17] >> 1) + ((vec1.generic[17] | vec2.generic[17]) & 1);
+vec1.generic[18] = (vec1.generic[18] >> 1) + (vec2.generic[18] >> 1) + ((vec1.generic[18] | vec2.generic[18]) & 1);
+vec1.generic[19] = (vec1.generic[19] >> 1) + (vec2.generic[19] >> 1) + ((vec1.generic[19] | vec2.generic[19]) & 1);
+vec1.generic[20] = (vec1.generic[20] >> 1) + (vec2.generic[20] >> 1) + ((vec1.generic[20] | vec2.generic[20]) & 1);
+vec1.generic[21] = (vec1.generic[21] >> 1) + (vec2.generic[21] >> 1) + ((vec1.generic[21] | vec2.generic[21]) & 1);
+vec1.generic[22] = (vec1.generic[22] >> 1) + (vec2.generic[22] >> 1) + ((vec1.generic[22] | vec2.generic[22]) & 1);
+vec1.generic[23] = (vec1.generic[23] >> 1) + (vec2.generic[23] >> 1) + ((vec1.generic[23] | vec2.generic[23]) & 1);
+vec1.generic[24] = (vec1.generic[24] >> 1) + (vec2.generic[24] >> 1) + ((vec1.generic[24] | vec2.generic[24]) & 1);
+vec1.generic[25] = (vec1.generic[25] >> 1) + (vec2.generic[25] >> 1) + ((vec1.generic[25] | vec2.generic[25]) & 1);
+vec1.generic[26] = (vec1.generic[26] >> 1) + (vec2.generic[26] >> 1) + ((vec1.generic[26] | vec2.generic[26]) & 1);
+vec1.generic[27] = (vec1.generic[27] >> 1) + (vec2.generic[27] >> 1) + ((vec1.generic[27] | vec2.generic[27]) & 1);
+vec1.generic[28] = (vec1.generic[28] >> 1) + (vec2.generic[28] >> 1) + ((vec1.generic[28] | vec2.generic[28]) & 1);
+vec1.generic[29] = (vec1.generic[29] >> 1) + (vec2.generic[29] >> 1) + ((vec1.generic[29] | vec2.generic[29]) & 1);
+vec1.generic[30] = (vec1.generic[30] >> 1) + (vec2.generic[30] >> 1) + ((vec1.generic[30] | vec2.generic[30]) & 1);
+vec1.generic[31] = (vec1.generic[31] >> 1) + (vec2.generic[31] >> 1) + ((vec1.generic[31] | vec2.generic[31]) & 1);
+	return vec1;
+}
+# define VUINT16x32_AVG_DEFINED
+#endif
+#if !defined(VUINT16x32_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] & vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] & vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] & vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] & vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] & vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] & vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] & vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] & vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] & vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] & vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] & vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] & vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] & vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] & vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] & vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] & vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_AND_DEFINED
+#endif
+#if !defined(VUINT16x32_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] | vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] | vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] | vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] | vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] | vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] | vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] | vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] | vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] | vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] | vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] | vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] | vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] | vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] | vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] | vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] | vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_OR_DEFINED
+#endif
+#if !defined(VUINT16x32_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] ^ vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] ^ vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] ^ vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] ^ vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] ^ vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] ^ vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] ^ vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] ^ vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] ^ vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] ^ vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] ^ vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] ^ vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] ^ vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] ^ vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] ^ vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] ^ vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_XOR_DEFINED
+#endif
+#if !defined(VUINT16x32_NOT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	vec.generic[16] = ~vec.generic[16];
+	vec.generic[17] = ~vec.generic[17];
+	vec.generic[18] = ~vec.generic[18];
+	vec.generic[19] = ~vec.generic[19];
+	vec.generic[20] = ~vec.generic[20];
+	vec.generic[21] = ~vec.generic[21];
+	vec.generic[22] = ~vec.generic[22];
+	vec.generic[23] = ~vec.generic[23];
+	vec.generic[24] = ~vec.generic[24];
+	vec.generic[25] = ~vec.generic[25];
+	vec.generic[26] = ~vec.generic[26];
+	vec.generic[27] = ~vec.generic[27];
+	vec.generic[28] = ~vec.generic[28];
+	vec.generic[29] = ~vec.generic[29];
+	vec.generic[30] = ~vec.generic[30];
+	vec.generic[31] = ~vec.generic[31];
+	return vec;
+}
+# define VUINT16x32_NOT_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] < vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] < vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] < vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] < vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] < vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] < vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] < vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] < vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] < vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] < vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] < vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] < vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] < vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] < vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] < vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] < vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x32_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] == vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] == vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] == vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] == vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] == vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] == vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] == vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] == vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] == vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] == vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] == vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] == vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] == vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] == vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] == vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] == vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x32_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] > vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] > vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] > vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] > vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] > vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] > vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] > vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] > vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] > vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] > vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] > vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] > vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] > vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] > vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] > vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] > vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x32_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] <= vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] <= vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] <= vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] <= vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] <= vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] <= vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] <= vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] <= vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] <= vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] <= vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] <= vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] <= vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] <= vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] <= vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] <= vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] <= vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x32_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x32_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[16], (vec1.generic[16] >= vec2.generic[16]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[17], (vec1.generic[17] >= vec2.generic[17]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[18], (vec1.generic[18] >= vec2.generic[18]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[19], (vec1.generic[19] >= vec2.generic[19]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[20], (vec1.generic[20] >= vec2.generic[20]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[21], (vec1.generic[21] >= vec2.generic[21]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[22], (vec1.generic[22] >= vec2.generic[22]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[23], (vec1.generic[23] >= vec2.generic[23]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[24], (vec1.generic[24] >= vec2.generic[24]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[25], (vec1.generic[25] >= vec2.generic[25]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[26], (vec1.generic[26] >= vec2.generic[26]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[27], (vec1.generic[27] >= vec2.generic[27]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[28], (vec1.generic[28] >= vec2.generic[28]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[29], (vec1.generic[29] >= vec2.generic[29]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[30], (vec1.generic[30] >= vec2.generic[30]) ? 0xFF : 0, 2);
+	memset(&vec1.generic[31], (vec1.generic[31] >= vec2.generic[31]) ? 0xFF : 0, 2);
+	return vec1;
+}
+# define VUINT16x32_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x32_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] < vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] < vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] < vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] < vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] < vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] < vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] < vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] < vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] < vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] < vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] < vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] < vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] < vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] < vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] < vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] < vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_MIN_DEFINED
+#endif
+#if !defined(VUINT16x32_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	vec1.generic[16] = (vec1.generic[16] > vec2.generic[16]) ? (vec1.generic[16]) : (vec2.generic[16]);
+	vec1.generic[17] = (vec1.generic[17] > vec2.generic[17]) ? (vec1.generic[17]) : (vec2.generic[17]);
+	vec1.generic[18] = (vec1.generic[18] > vec2.generic[18]) ? (vec1.generic[18]) : (vec2.generic[18]);
+	vec1.generic[19] = (vec1.generic[19] > vec2.generic[19]) ? (vec1.generic[19]) : (vec2.generic[19]);
+	vec1.generic[20] = (vec1.generic[20] > vec2.generic[20]) ? (vec1.generic[20]) : (vec2.generic[20]);
+	vec1.generic[21] = (vec1.generic[21] > vec2.generic[21]) ? (vec1.generic[21]) : (vec2.generic[21]);
+	vec1.generic[22] = (vec1.generic[22] > vec2.generic[22]) ? (vec1.generic[22]) : (vec2.generic[22]);
+	vec1.generic[23] = (vec1.generic[23] > vec2.generic[23]) ? (vec1.generic[23]) : (vec2.generic[23]);
+	vec1.generic[24] = (vec1.generic[24] > vec2.generic[24]) ? (vec1.generic[24]) : (vec2.generic[24]);
+	vec1.generic[25] = (vec1.generic[25] > vec2.generic[25]) ? (vec1.generic[25]) : (vec2.generic[25]);
+	vec1.generic[26] = (vec1.generic[26] > vec2.generic[26]) ? (vec1.generic[26]) : (vec2.generic[26]);
+	vec1.generic[27] = (vec1.generic[27] > vec2.generic[27]) ? (vec1.generic[27]) : (vec2.generic[27]);
+	vec1.generic[28] = (vec1.generic[28] > vec2.generic[28]) ? (vec1.generic[28]) : (vec2.generic[28]);
+	vec1.generic[29] = (vec1.generic[29] > vec2.generic[29]) ? (vec1.generic[29]) : (vec2.generic[29]);
+	vec1.generic[30] = (vec1.generic[30] > vec2.generic[30]) ? (vec1.generic[30]) : (vec2.generic[30]);
+	vec1.generic[31] = (vec1.generic[31] > vec2.generic[31]) ? (vec1.generic[31]) : (vec2.generic[31]);
+	return vec1;
+}
+# define VUINT16x32_MAX_DEFINED
+#endif
+#if !defined(VUINT16x32_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x32_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x32_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	vec1.generic[16] >>= vec2.generic[0];
+	vec1.generic[17] >>= vec2.generic[0];
+	vec1.generic[18] >>= vec2.generic[0];
+	vec1.generic[19] >>= vec2.generic[0];
+	vec1.generic[20] >>= vec2.generic[0];
+	vec1.generic[21] >>= vec2.generic[0];
+	vec1.generic[22] >>= vec2.generic[0];
+	vec1.generic[23] >>= vec2.generic[0];
+	vec1.generic[24] >>= vec2.generic[0];
+	vec1.generic[25] >>= vec2.generic[0];
+	vec1.generic[26] >>= vec2.generic[0];
+	vec1.generic[27] >>= vec2.generic[0];
+	vec1.generic[28] >>= vec2.generic[0];
+	vec1.generic[29] >>= vec2.generic[0];
+	vec1.generic[30] >>= vec2.generic[0];
+	vec1.generic[31] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x32_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x32_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	vec1.generic[16] <<= vec2.generic[0];
+	vec1.generic[17] <<= vec2.generic[0];
+	vec1.generic[18] <<= vec2.generic[0];
+	vec1.generic[19] <<= vec2.generic[0];
+	vec1.generic[20] <<= vec2.generic[0];
+	vec1.generic[21] <<= vec2.generic[0];
+	vec1.generic[22] <<= vec2.generic[0];
+	vec1.generic[23] <<= vec2.generic[0];
+	vec1.generic[24] <<= vec2.generic[0];
+	vec1.generic[25] <<= vec2.generic[0];
+	vec1.generic[26] <<= vec2.generic[0];
+	vec1.generic[27] <<= vec2.generic[0];
+	vec1.generic[28] <<= vec2.generic[0];
+	vec1.generic[29] <<= vec2.generic[0];
+	vec1.generic[30] <<= vec2.generic[0];
+	vec1.generic[31] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT16x32_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x)
+{
+	vint32x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VINT32x2_SPLAT_DEFINED
 #endif
-#ifndef VINT32x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 32, 2)
+#if !defined(VINT32x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
 # define VINT32x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(/* nothing */, 32, 2)
+#if !defined(VINT32x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
 # define VINT32x2_LOAD_DEFINED
 #endif
-#ifndef VINT32x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(/* nothing */, 32, 2)
+#if !defined(VINT32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
 # define VINT32x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x2_STORE_DEFINED
-VEC_GENERIC_STORE(/* nothing */, 32, 2)
+#if !defined(VINT32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
 # define VINT32x2_STORE_DEFINED
 #endif
-#ifndef VINT32x2_ADD_DEFINED
-VEC_GENERIC_ADD(/* nothing */, 32, 2)
+#if !defined(VINT32x2_ADD_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_ADD_DEFINED
 #endif
-#ifndef VINT32x2_SUB_DEFINED
-VEC_GENERIC_SUB(/* nothing */, 32, 2)
+#if !defined(VINT32x2_SUB_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_SUB_DEFINED
 #endif
-#ifndef VINT32x2_MUL_DEFINED
-VEC_GENERIC_MUL(/* nothing */, 32, 2)
+#if !defined(VINT32x2_MUL_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_MUL_DEFINED
 #endif
-#ifndef VINT32x2_DIV_DEFINED
-VEC_GENERIC_DIV(/* nothing */, 32, 2)
+#if !defined(VINT32x2_DIV_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_div(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT32x2_DIV_DEFINED
 #endif
-#ifndef VINT32x2_MOD_DEFINED
-VEC_GENERIC_MOD(/* nothing */, 32, 2)
+#if !defined(VINT32x2_MOD_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_mod(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT32x2_MOD_DEFINED
 #endif
-#ifndef VINT32x2_AVG_DEFINED
-VEC_GENERIC_AVG(/* nothing */, 32, 2)
+#if !defined(VINT32x2_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2)
+{
+	vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
 # define VINT32x2_AVG_DEFINED
 #endif
-#ifndef VINT32x2_AND_DEFINED
-VEC_GENERIC_AND(/* nothing */, 32, 2)
+#if !defined(VINT32x2_AND_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_AND_DEFINED
 #endif
-#ifndef VINT32x2_OR_DEFINED
-VEC_GENERIC_OR(/* nothing */, 32, 2)
+#if !defined(VINT32x2_OR_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_OR_DEFINED
 #endif
-#ifndef VINT32x2_XOR_DEFINED
-VEC_GENERIC_XOR(/* nothing */, 32, 2)
+#if !defined(VINT32x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_XOR_DEFINED
 #endif
-#ifndef VINT32x2_NOT_DEFINED
-VEC_GENERIC_NOT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VINT32x2_NOT_DEFINED
 #endif
-#ifndef VINT32x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VINT32x2_CMPLT_DEFINED
 #endif
-#ifndef VINT32x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(/* nothing */, 32, 2)
+#if !defined(VINT32x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VINT32x2_CMPEQ_DEFINED
 #endif
-#ifndef VINT32x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VINT32x2_CMPGT_DEFINED
 #endif
-#ifndef VINT32x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(/* nothing */, 32, 2)
+#if !defined(VINT32x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VINT32x2_CMPLE_DEFINED
 #endif
-#ifndef VINT32x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(/* nothing */, 32, 2)
+#if !defined(VINT32x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VINT32x2_CMPGE_DEFINED
 #endif
-#ifndef VINT32x2_MIN_DEFINED
-VEC_GENERIC_MIN(/* nothing */, 32, 2)
+#if !defined(VINT32x2_MIN_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_MIN_DEFINED
 #endif
-#ifndef VINT32x2_MAX_DEFINED
-VEC_GENERIC_MAX(/* nothing */, 32, 2)
+#if !defined(VINT32x2_MAX_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_MAX_DEFINED
 #endif
-#ifndef VINT32x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+	return vec1;
+}
 # define VINT32x2_RSHIFT_DEFINED
 #endif
-#ifndef VINT32x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT32x2_LRSHIFT_DEFINED
 #endif
-#ifndef VINT32x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(/* nothing */, 32, 2)
+#if !defined(VINT32x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT32x2_LSHIFT_DEFINED
 #endif
-
-
-/* vint32x2 */
-
-#ifndef VUINT32x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(u, 32, 2)
+#if !defined(VUINT32x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x)
+{
+	vuint32x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VUINT32x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(u, 32, 2)
+#if !defined(VUINT32x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
 # define VUINT32x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(u, 32, 2)
+#if !defined(VUINT32x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
 # define VUINT32x2_LOAD_DEFINED
 #endif
-#ifndef VUINT32x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(u, 32, 2)
+#if !defined(VUINT32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
 # define VUINT32x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x2_STORE_DEFINED
-VEC_GENERIC_STORE(u, 32, 2)
+#if !defined(VUINT32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
 # define VUINT32x2_STORE_DEFINED
 #endif
-#ifndef VUINT32x2_ADD_DEFINED
-VEC_GENERIC_ADD(u, 32, 2)
+#if !defined(VUINT32x2_ADD_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_ADD_DEFINED
 #endif
-#ifndef VUINT32x2_SUB_DEFINED
-VEC_GENERIC_SUB(u, 32, 2)
+#if !defined(VUINT32x2_SUB_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_SUB_DEFINED
 #endif
-#ifndef VUINT32x2_MUL_DEFINED
-VEC_GENERIC_MUL(u, 32, 2)
+#if !defined(VUINT32x2_MUL_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_MUL_DEFINED
 #endif
-#ifndef VUINT32x2_DIV_DEFINED
-VEC_GENERIC_DIV(u, 32, 2)
+#if !defined(VUINT32x2_DIV_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_div(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT32x2_DIV_DEFINED
 #endif
-#ifndef VUINT32x2_MOD_DEFINED
-VEC_GENERIC_MOD(u, 32, 2)
+#if !defined(VUINT32x2_MOD_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_mod(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT32x2_MOD_DEFINED
 #endif
-#ifndef VUINT32x2_AVG_DEFINED
-VEC_GENERIC_AVG(u, 32, 2)
+#if !defined(VUINT32x2_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+	return vec1;
+}
 # define VUINT32x2_AVG_DEFINED
 #endif
-#ifndef VUINT32x2_AND_DEFINED
-VEC_GENERIC_AND(u, 32, 2)
+#if !defined(VUINT32x2_AND_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_AND_DEFINED
 #endif
-#ifndef VUINT32x2_OR_DEFINED
-VEC_GENERIC_OR(u, 32, 2)
+#if !defined(VUINT32x2_OR_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_OR_DEFINED
 #endif
-#ifndef VUINT32x2_XOR_DEFINED
-VEC_GENERIC_XOR(u, 32, 2)
+#if !defined(VUINT32x2_XOR_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_XOR_DEFINED
 #endif
-#ifndef VUINT32x2_NOT_DEFINED
-VEC_GENERIC_NOT(u, 32, 2)
+#if !defined(VUINT32x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VUINT32x2_NOT_DEFINED
 #endif
-#ifndef VUINT32x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(u, 32, 2)
+#if !defined(VUINT32x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VUINT32x2_CMPLT_DEFINED
 #endif
-#ifndef VUINT32x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(u, 32, 2)
+#if !defined(VUINT32x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VUINT32x2_CMPEQ_DEFINED
 #endif
-#ifndef VUINT32x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(u, 32, 2)
+#if !defined(VUINT32x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VUINT32x2_CMPGT_DEFINED
 #endif
-#ifndef VUINT32x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(u, 32, 2)
+#if !defined(VUINT32x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VUINT32x2_CMPLE_DEFINED
 #endif
-#ifndef VUINT32x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(u, 32, 2)
+#if !defined(VUINT32x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
 # define VUINT32x2_CMPGE_DEFINED
 #endif
-#ifndef VUINT32x2_MIN_DEFINED
-VEC_GENERIC_MIN(u, 32, 2)
+#if !defined(VUINT32x2_MIN_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_MIN_DEFINED
 #endif
-#ifndef VUINT32x2_MAX_DEFINED
-VEC_GENERIC_MAX(u, 32, 2)
+#if !defined(VUINT32x2_MAX_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT32x2_MAX_DEFINED
 #endif
-#ifndef VUINT32x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(u, 32, 2)
+#if !defined(VUINT32x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT32x2_RSHIFT_DEFINED
 #endif
-#ifndef VUINT32x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(u, 32, 2)
+#if !defined(VUINT32x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT32x2_LRSHIFT_DEFINED
 #endif
-#ifndef VUINT32x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(u, 32, 2)
+#if !defined(VUINT32x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	return vec1;
+}
 # define VUINT32x2_LSHIFT_DEFINED
 #endif
-
-
-/* vuint64x2 */
-
-#ifndef VINT64x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(/* nothing */, 64, 2)
+#if !defined(VINT32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
+{
+	vint32x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VINT32x4_SPLAT_DEFINED
+#endif
+#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VINT32x4_LOAD_DEFINED
+#endif
+#if !defined(VINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VINT32x4_STORE_DEFINED
+#endif
+#if !defined(VINT32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_ADD_DEFINED
+#endif
+#if !defined(VINT32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_SUB_DEFINED
+#endif
+#if !defined(VINT32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_MUL_DEFINED
+#endif
+#if !defined(VINT32x4_DIV_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_div(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT32x4_DIV_DEFINED
+#endif
+#if !defined(VINT32x4_MOD_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_mod(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT32x4_MOD_DEFINED
+#endif
+#if !defined(VINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+{
+	vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT32x4_AVG_DEFINED
+#endif
+#if !defined(VINT32x4_AND_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_AND_DEFINED
+#endif
+#if !defined(VINT32x4_OR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_OR_DEFINED
+#endif
+#if !defined(VINT32x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_XOR_DEFINED
+#endif
+#if !defined(VINT32x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VINT32x4_NOT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_MIN_DEFINED
+#endif
+#if !defined(VINT32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_MAX_DEFINED
+#endif
+#if !defined(VINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+	return vec1;
+}
+# define VINT32x4_RSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
+{
+	vuint32x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VUINT32x4_SPLAT_DEFINED
+#endif
+#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VUINT32x4_LOAD_DEFINED
+#endif
+#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VUINT32x4_STORE_DEFINED
+#endif
+#if !defined(VUINT32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_ADD_DEFINED
+#endif
+#if !defined(VUINT32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_SUB_DEFINED
+#endif
+#if !defined(VUINT32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_MUL_DEFINED
+#endif
+#if !defined(VUINT32x4_DIV_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_div(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT32x4_DIV_DEFINED
+#endif
+#if !defined(VUINT32x4_MOD_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mod(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT32x4_MOD_DEFINED
+#endif
+#if !defined(VUINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+	return vec1;
+}
+# define VUINT32x4_AVG_DEFINED
+#endif
+#if !defined(VUINT32x4_AND_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_AND_DEFINED
+#endif
+#if !defined(VUINT32x4_OR_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_OR_DEFINED
+#endif
+#if !defined(VUINT32x4_XOR_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_XOR_DEFINED
+#endif
+#if !defined(VUINT32x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VUINT32x4_NOT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x4_CMPLT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x4_CMPGT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x4_CMPLE_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x4_CMPGE_DEFINED
+#endif
+#if !defined(VUINT32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_MIN_DEFINED
+#endif
+#if !defined(VUINT32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT32x4_MAX_DEFINED
+#endif
+#if !defined(VUINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x4_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x)
+{
+	vint32x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VINT32x8_SPLAT_DEFINED
+#endif
+#if !defined(VINT32x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT32x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT32x8_LOAD_DEFINED
+#endif
+#if !defined(VINT32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT32x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT32x8_STORE_DEFINED
+#endif
+#if !defined(VINT32x8_ADD_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_ADD_DEFINED
+#endif
+#if !defined(VINT32x8_SUB_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_SUB_DEFINED
+#endif
+#if !defined(VINT32x8_MUL_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_MUL_DEFINED
+#endif
+#if !defined(VINT32x8_DIV_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_div(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT32x8_DIV_DEFINED
+#endif
+#if !defined(VINT32x8_MOD_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_mod(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT32x8_MOD_DEFINED
+#endif
+#if !defined(VINT32x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2)
+{
+	vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT32x8_AVG_DEFINED
+#endif
+#if !defined(VINT32x8_AND_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_AND_DEFINED
+#endif
+#if !defined(VINT32x8_OR_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_OR_DEFINED
+#endif
+#if !defined(VINT32x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_XOR_DEFINED
+#endif
+#if !defined(VINT32x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VINT32x8_NOT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x8_MIN_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_MIN_DEFINED
+#endif
+#if !defined(VINT32x8_MAX_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_MAX_DEFINED
+#endif
+#if !defined(VINT32x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+	return vec1;
+}
+# define VINT32x8_RSHIFT_DEFINED
+#endif
+#if !defined(VINT32x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT32x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT32x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x)
+{
+	vuint32x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VUINT32x8_SPLAT_DEFINED
+#endif
+#if !defined(VUINT32x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT32x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT32x8_LOAD_DEFINED
+#endif
+#if !defined(VUINT32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT32x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT32x8_STORE_DEFINED
+#endif
+#if !defined(VUINT32x8_ADD_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_ADD_DEFINED
+#endif
+#if !defined(VUINT32x8_SUB_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_SUB_DEFINED
+#endif
+#if !defined(VUINT32x8_MUL_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_MUL_DEFINED
+#endif
+#if !defined(VUINT32x8_DIV_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_div(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT32x8_DIV_DEFINED
+#endif
+#if !defined(VUINT32x8_MOD_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_mod(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT32x8_MOD_DEFINED
+#endif
+#if !defined(VUINT32x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+	return vec1;
+}
+# define VUINT32x8_AVG_DEFINED
+#endif
+#if !defined(VUINT32x8_AND_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_AND_DEFINED
+#endif
+#if !defined(VUINT32x8_OR_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_OR_DEFINED
+#endif
+#if !defined(VUINT32x8_XOR_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_XOR_DEFINED
+#endif
+#if !defined(VUINT32x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VUINT32x8_NOT_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x8_CMPLT_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x8_CMPGT_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x8_CMPLE_DEFINED
+#endif
+#if !defined(VUINT32x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x8_CMPGE_DEFINED
+#endif
+#if !defined(VUINT32x8_MIN_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_MIN_DEFINED
+#endif
+#if !defined(VUINT32x8_MAX_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT32x8_MAX_DEFINED
+#endif
+#if !defined(VUINT32x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x)
+{
+	vint32x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VINT32x16_SPLAT_DEFINED
+#endif
+#if !defined(VINT32x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT32x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT32x16_LOAD_DEFINED
+#endif
+#if !defined(VINT32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT32x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT32x16_STORE_DEFINED
+#endif
+#if !defined(VINT32x16_ADD_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_ADD_DEFINED
+#endif
+#if !defined(VINT32x16_SUB_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_SUB_DEFINED
+#endif
+#if !defined(VINT32x16_MUL_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_MUL_DEFINED
+#endif
+#if !defined(VINT32x16_DIV_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_div(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT32x16_DIV_DEFINED
+#endif
+#if !defined(VINT32x16_MOD_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_mod(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VINT32x16_MOD_DEFINED
+#endif
+#if !defined(VINT32x16_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2)
+{
+	vec_int32 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[8] % 2);
+	y_d_rem = (vec2.generic[8] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[8] = ((vec1.generic[8] / 2) + (vec2.generic[8] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[9] % 2);
+	y_d_rem = (vec2.generic[9] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[9] = ((vec1.generic[9] / 2) + (vec2.generic[9] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[10] % 2);
+	y_d_rem = (vec2.generic[10] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[10] = ((vec1.generic[10] / 2) + (vec2.generic[10] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[11] % 2);
+	y_d_rem = (vec2.generic[11] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[11] = ((vec1.generic[11] / 2) + (vec2.generic[11] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[12] % 2);
+	y_d_rem = (vec2.generic[12] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[12] = ((vec1.generic[12] / 2) + (vec2.generic[12] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[13] % 2);
+	y_d_rem = (vec2.generic[13] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[13] = ((vec1.generic[13] / 2) + (vec2.generic[13] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[14] % 2);
+	y_d_rem = (vec2.generic[14] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[14] = ((vec1.generic[14] / 2) + (vec2.generic[14] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[15] % 2);
+	y_d_rem = (vec2.generic[15] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[15] = ((vec1.generic[15] / 2) + (vec2.generic[15] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT32x16_AVG_DEFINED
+#endif
+#if !defined(VINT32x16_AND_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_AND_DEFINED
+#endif
+#if !defined(VINT32x16_OR_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_OR_DEFINED
+#endif
+#if !defined(VINT32x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_XOR_DEFINED
+#endif
+#if !defined(VINT32x16_NOT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VINT32x16_NOT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VINT32x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x16_MIN_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_MIN_DEFINED
+#endif
+#if !defined(VINT32x16_MAX_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_MAX_DEFINED
+#endif
+#if !defined(VINT32x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+vec1.generic[8] = ((~vec1.generic[8]) >> vec2.generic[8]);
+vec1.generic[9] = ((~vec1.generic[9]) >> vec2.generic[9]);
+vec1.generic[10] = ((~vec1.generic[10]) >> vec2.generic[10]);
+vec1.generic[11] = ((~vec1.generic[11]) >> vec2.generic[11]);
+vec1.generic[12] = ((~vec1.generic[12]) >> vec2.generic[12]);
+vec1.generic[13] = ((~vec1.generic[13]) >> vec2.generic[13]);
+vec1.generic[14] = ((~vec1.generic[14]) >> vec2.generic[14]);
+vec1.generic[15] = ((~vec1.generic[15]) >> vec2.generic[15]);
+	return vec1;
+}
+# define VINT32x16_RSHIFT_DEFINED
+#endif
+#if !defined(VINT32x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u >>= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u >>= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u >>= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u >>= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u >>= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u >>= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u >>= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u >>= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT32x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	union { vec_uint32 u; vec_int32 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	x.s = vec1.generic[8];
+	x.u <<= vec2.generic[8];
+	vec1.generic[8] = x.s;
+	x.s = vec1.generic[9];
+	x.u <<= vec2.generic[9];
+	vec1.generic[9] = x.s;
+	x.s = vec1.generic[10];
+	x.u <<= vec2.generic[10];
+	vec1.generic[10] = x.s;
+	x.s = vec1.generic[11];
+	x.u <<= vec2.generic[11];
+	vec1.generic[11] = x.s;
+	x.s = vec1.generic[12];
+	x.u <<= vec2.generic[12];
+	vec1.generic[12] = x.s;
+	x.s = vec1.generic[13];
+	x.u <<= vec2.generic[13];
+	vec1.generic[13] = x.s;
+	x.s = vec1.generic[14];
+	x.u <<= vec2.generic[14];
+	vec1.generic[14] = x.s;
+	x.s = vec1.generic[15];
+	x.u <<= vec2.generic[15];
+	vec1.generic[15] = x.s;
+	return vec1;
+}
+# define VINT32x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x)
+{
+	vuint32x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VUINT32x16_SPLAT_DEFINED
+#endif
+#if !defined(VUINT32x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT32x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT32x16_LOAD_DEFINED
+#endif
+#if !defined(VUINT32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT32x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT32x16_STORE_DEFINED
+#endif
+#if !defined(VUINT32x16_ADD_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_ADD_DEFINED
+#endif
+#if !defined(VUINT32x16_SUB_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_SUB_DEFINED
+#endif
+#if !defined(VUINT32x16_MUL_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_MUL_DEFINED
+#endif
+#if !defined(VUINT32x16_DIV_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_div(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT32x16_DIV_DEFINED
+#endif
+#if !defined(VUINT32x16_MOD_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_mod(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] % vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] % vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] % vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] % vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] % vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] % vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] % vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] % vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VUINT32x16_MOD_DEFINED
+#endif
+#if !defined(VUINT32x16_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+vec1.generic[8] = (vec1.generic[8] >> 1) + (vec2.generic[8] >> 1) + ((vec1.generic[8] | vec2.generic[8]) & 1);
+vec1.generic[9] = (vec1.generic[9] >> 1) + (vec2.generic[9] >> 1) + ((vec1.generic[9] | vec2.generic[9]) & 1);
+vec1.generic[10] = (vec1.generic[10] >> 1) + (vec2.generic[10] >> 1) + ((vec1.generic[10] | vec2.generic[10]) & 1);
+vec1.generic[11] = (vec1.generic[11] >> 1) + (vec2.generic[11] >> 1) + ((vec1.generic[11] | vec2.generic[11]) & 1);
+vec1.generic[12] = (vec1.generic[12] >> 1) + (vec2.generic[12] >> 1) + ((vec1.generic[12] | vec2.generic[12]) & 1);
+vec1.generic[13] = (vec1.generic[13] >> 1) + (vec2.generic[13] >> 1) + ((vec1.generic[13] | vec2.generic[13]) & 1);
+vec1.generic[14] = (vec1.generic[14] >> 1) + (vec2.generic[14] >> 1) + ((vec1.generic[14] | vec2.generic[14]) & 1);
+vec1.generic[15] = (vec1.generic[15] >> 1) + (vec2.generic[15] >> 1) + ((vec1.generic[15] | vec2.generic[15]) & 1);
+	return vec1;
+}
+# define VUINT32x16_AVG_DEFINED
+#endif
+#if !defined(VUINT32x16_AND_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] & vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] & vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] & vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] & vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] & vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] & vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] & vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] & vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_AND_DEFINED
+#endif
+#if !defined(VUINT32x16_OR_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] | vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] | vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] | vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] | vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] | vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] | vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] | vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] | vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_OR_DEFINED
+#endif
+#if !defined(VUINT32x16_XOR_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] ^ vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] ^ vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] ^ vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] ^ vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] ^ vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] ^ vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] ^ vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] ^ vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_XOR_DEFINED
+#endif
+#if !defined(VUINT32x16_NOT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	vec.generic[8] = ~vec.generic[8];
+	vec.generic[9] = ~vec.generic[9];
+	vec.generic[10] = ~vec.generic[10];
+	vec.generic[11] = ~vec.generic[11];
+	vec.generic[12] = ~vec.generic[12];
+	vec.generic[13] = ~vec.generic[13];
+	vec.generic[14] = ~vec.generic[14];
+	vec.generic[15] = ~vec.generic[15];
+	return vec;
+}
+# define VUINT32x16_NOT_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x16_CMPLT_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x16_CMPGT_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x16_CMPLE_DEFINED
+#endif
+#if !defined(VUINT32x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VUINT32x16_CMPGE_DEFINED
+#endif
+#if !defined(VUINT32x16_MIN_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_MIN_DEFINED
+#endif
+#if !defined(VUINT32x16_MAX_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VUINT32x16_MAX_DEFINED
+#endif
+#if !defined(VUINT32x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x16_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	vec1.generic[8] >>= vec2.generic[0];
+	vec1.generic[9] >>= vec2.generic[0];
+	vec1.generic[10] >>= vec2.generic[0];
+	vec1.generic[11] >>= vec2.generic[0];
+	vec1.generic[12] >>= vec2.generic[0];
+	vec1.generic[13] >>= vec2.generic[0];
+	vec1.generic[14] >>= vec2.generic[0];
+	vec1.generic[15] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	vec1.generic[8] <<= vec2.generic[0];
+	vec1.generic[9] <<= vec2.generic[0];
+	vec1.generic[10] <<= vec2.generic[0];
+	vec1.generic[11] <<= vec2.generic[0];
+	vec1.generic[12] <<= vec2.generic[0];
+	vec1.generic[13] <<= vec2.generic[0];
+	vec1.generic[14] <<= vec2.generic[0];
+	vec1.generic[15] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT32x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x)
+{
+	vint64x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VINT64x2_SPLAT_DEFINED
 #endif
-#ifndef VINT64x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(/* nothing */, 64, 2)
+#if !defined(VINT64x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
 # define VINT64x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(/* nothing */, 64, 2)
+#if !defined(VINT64x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
 # define VINT64x2_LOAD_DEFINED
 #endif
-#ifndef VINT64x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(/* nothing */, 64, 2)
+#if !defined(VINT64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
 # define VINT64x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT64x2_STORE_DEFINED
-VEC_GENERIC_STORE(/* nothing */, 64, 2)
+#if !defined(VINT64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
 # define VINT64x2_STORE_DEFINED
 #endif
-#ifndef VINT64x2_ADD_DEFINED
-VEC_GENERIC_ADD(/* nothing */, 64, 2)
+#if !defined(VINT64x2_ADD_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_ADD_DEFINED
 #endif
-#ifndef VINT64x2_SUB_DEFINED
-VEC_GENERIC_SUB(/* nothing */, 64, 2)
+#if !defined(VINT64x2_SUB_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_SUB_DEFINED
 #endif
-#ifndef VINT64x2_MUL_DEFINED
-VEC_GENERIC_MUL(/* nothing */, 64, 2)
+#if !defined(VINT64x2_MUL_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_MUL_DEFINED
 #endif
-#ifndef VINT64x2_DIV_DEFINED
-VEC_GENERIC_DIV(/* nothing */, 64, 2)
+#if !defined(VINT64x2_DIV_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_div(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT64x2_DIV_DEFINED
 #endif
-#ifndef VINT64x2_MOD_DEFINED
-VEC_GENERIC_MOD(/* nothing */, 64, 2)
+#if !defined(VINT64x2_MOD_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_mod(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VINT64x2_MOD_DEFINED
 #endif
-#ifndef VINT64x2_AVG_DEFINED
-VEC_GENERIC_AVG(/* nothing */, 64, 2)
+#if !defined(VINT64x2_AVG_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2)
+{
+	vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
 # define VINT64x2_AVG_DEFINED
 #endif
-#ifndef VINT64x2_AND_DEFINED
-VEC_GENERIC_AND(/* nothing */, 64, 2)
+#if !defined(VINT64x2_AND_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_AND_DEFINED
 #endif
-#ifndef VINT64x2_OR_DEFINED
-VEC_GENERIC_OR(/* nothing */, 64, 2)
+#if !defined(VINT64x2_OR_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_OR_DEFINED
 #endif
-#ifndef VINT64x2_XOR_DEFINED
-VEC_GENERIC_XOR(/* nothing */, 64, 2)
+#if !defined(VINT64x2_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_XOR_DEFINED
 #endif
-#ifndef VINT64x2_NOT_DEFINED
-VEC_GENERIC_NOT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VINT64x2_NOT_DEFINED
 #endif
-#ifndef VINT64x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VINT64x2_CMPLT_DEFINED
 #endif
-#ifndef VINT64x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(/* nothing */, 64, 2)
+#if !defined(VINT64x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VINT64x2_CMPEQ_DEFINED
 #endif
-#ifndef VINT64x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VINT64x2_CMPGT_DEFINED
 #endif
-#ifndef VINT64x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(/* nothing */, 64, 2)
+#if !defined(VINT64x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VINT64x2_CMPLE_DEFINED
 #endif
-#ifndef VINT64x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(/* nothing */, 64, 2)
+#if !defined(VINT64x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VINT64x2_CMPGE_DEFINED
 #endif
-#ifndef VINT64x2_MIN_DEFINED
-VEC_GENERIC_MIN(/* nothing */, 64, 2)
+#if !defined(VINT64x2_MIN_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_MIN_DEFINED
 #endif
-#ifndef VINT64x2_MAX_DEFINED
-VEC_GENERIC_MAX(/* nothing */, 64, 2)
+#if !defined(VINT64x2_MAX_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_MAX_DEFINED
 #endif
-#ifndef VINT64x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+	return vec1;
+}
 # define VINT64x2_RSHIFT_DEFINED
 #endif
-#ifndef VINT64x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT64x2_LRSHIFT_DEFINED
 #endif
-#ifndef VINT64x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(/* nothing */, 64, 2)
+#if !defined(VINT64x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	return vec1;
+}
 # define VINT64x2_LSHIFT_DEFINED
 #endif
-
-
-/* vint64x2 */
-
-#ifndef VUINT64x2_SPLAT_DEFINED
-VEC_GENERIC_SPLAT(u, 64, 2)
+#if !defined(VUINT64x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x)
+{
+	vuint64x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
 # define VUINT64x2_SPLAT_DEFINED
 #endif
-#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED
-VEC_GENERIC_LOAD_ALIGNED(u, 64, 2)
+#if !defined(VUINT64x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
 # define VUINT64x2_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x2_LOAD_DEFINED
-VEC_GENERIC_LOAD(u, 64, 2)
+#if !defined(VUINT64x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
 # define VUINT64x2_LOAD_DEFINED
 #endif
-#ifndef VUINT64x2_STORE_ALIGNED_DEFINED
-VEC_GENERIC_STORE_ALIGNED(u, 64, 2)
+#if !defined(VUINT64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
 # define VUINT64x2_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT64x2_STORE_DEFINED
-VEC_GENERIC_STORE(u, 64, 2)
+#if !defined(VUINT64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
 # define VUINT64x2_STORE_DEFINED
 #endif
-#ifndef VUINT64x2_ADD_DEFINED
-VEC_GENERIC_ADD(u, 64, 2)
+#if !defined(VUINT64x2_ADD_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_ADD_DEFINED
 #endif
-#ifndef VUINT64x2_SUB_DEFINED
-VEC_GENERIC_SUB(u, 64, 2)
+#if !defined(VUINT64x2_SUB_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_SUB_DEFINED
 #endif
-#ifndef VUINT64x2_MUL_DEFINED
-VEC_GENERIC_MUL(u, 64, 2)
+#if !defined(VUINT64x2_MUL_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_MUL_DEFINED
 #endif
-#ifndef VUINT64x2_DIV_DEFINED
-VEC_GENERIC_DIV(u, 64, 2)
+#if !defined(VUINT64x2_DIV_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_div(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT64x2_DIV_DEFINED
 #endif
-#ifndef VUINT64x2_MOD_DEFINED
-VEC_GENERIC_MOD(u, 64, 2)
+#if !defined(VUINT64x2_MOD_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_mod(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	return vec1;
+}
 # define VUINT64x2_MOD_DEFINED
 #endif
-#ifndef VUINT64x2_AVG_DEFINED
-VEC_GENERIC_AVG(u, 64, 2)
+#if !defined(VUINT64x2_AVG_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+	return vec1;
+}
 # define VUINT64x2_AVG_DEFINED
 #endif
-#ifndef VUINT64x2_AND_DEFINED
-VEC_GENERIC_AND(u, 64, 2)
+#if !defined(VUINT64x2_AND_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_AND_DEFINED
 #endif
-#ifndef VUINT64x2_OR_DEFINED
-VEC_GENERIC_OR(u, 64, 2)
+#if !defined(VUINT64x2_OR_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_OR_DEFINED
 #endif
-#ifndef VUINT64x2_XOR_DEFINED
-VEC_GENERIC_XOR(u, 64, 2)
+#if !defined(VUINT64x2_XOR_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_XOR_DEFINED
 #endif
-#ifndef VUINT64x2_NOT_DEFINED
-VEC_GENERIC_NOT(u, 64, 2)
+#if !defined(VUINT64x2_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	return vec;
+}
 # define VUINT64x2_NOT_DEFINED
 #endif
-#ifndef VUINT64x2_CMPLT_DEFINED
-VEC_GENERIC_CMPLT(u, 64, 2)
+#if !defined(VUINT64x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VUINT64x2_CMPLT_DEFINED
 #endif
-#ifndef VUINT64x2_CMPEQ_DEFINED
-VEC_GENERIC_CMPEQ(u, 64, 2)
+#if !defined(VUINT64x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VUINT64x2_CMPEQ_DEFINED
 #endif
-#ifndef VUINT64x2_CMPGT_DEFINED
-VEC_GENERIC_CMPGT(u, 64, 2)
+#if !defined(VUINT64x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VUINT64x2_CMPGT_DEFINED
 #endif
-#ifndef VUINT64x2_CMPLE_DEFINED
-VEC_GENERIC_CMPLE(u, 64, 2)
+#if !defined(VUINT64x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VUINT64x2_CMPLE_DEFINED
 #endif
-#ifndef VUINT64x2_CMPGE_DEFINED
-VEC_GENERIC_CMPGE(u, 64, 2)
+#if !defined(VUINT64x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
 # define VUINT64x2_CMPGE_DEFINED
 #endif
-#ifndef VUINT64x2_MIN_DEFINED
-VEC_GENERIC_MIN(u, 64, 2)
+#if !defined(VUINT64x2_MIN_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_MIN_DEFINED
 #endif
-#ifndef VUINT64x2_MAX_DEFINED
-VEC_GENERIC_MAX(u, 64, 2)
+#if !defined(VUINT64x2_MAX_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
 # define VUINT64x2_MAX_DEFINED
 #endif
-#ifndef VUINT64x2_RSHIFT_DEFINED
-VEC_GENERIC_RSHIFT(u, 64, 2)
+#if !defined(VUINT64x2_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT64x2_RSHIFT_DEFINED
 #endif
-#ifndef VUINT64x2_LRSHIFT_DEFINED
-VEC_GENERIC_LRSHIFT(u, 64, 2)
+#if !defined(VUINT64x2_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	return vec1;
+}
 # define VUINT64x2_LRSHIFT_DEFINED
 #endif
-#ifndef VUINT64x2_LSHIFT_DEFINED
-VEC_GENERIC_LSHIFT(u, 64, 2)
+#if !defined(VUINT64x2_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	return vec1;
+}
 # define VUINT64x2_LSHIFT_DEFINED
 #endif
-#endif /* VEC_IMPL_GENERIC_H_ */
-
+#if !defined(VINT64x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x)
+{
+	vint64x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VINT64x4_SPLAT_DEFINED
+#endif
+#if !defined(VINT64x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT64x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VINT64x4_LOAD_DEFINED
+#endif
+#if !defined(VINT64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT64x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VINT64x4_STORE_DEFINED
+#endif
+#if !defined(VINT64x4_ADD_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_ADD_DEFINED
+#endif
+#if !defined(VINT64x4_SUB_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_SUB_DEFINED
+#endif
+#if !defined(VINT64x4_MUL_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_MUL_DEFINED
+#endif
+#if !defined(VINT64x4_DIV_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_div(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT64x4_DIV_DEFINED
+#endif
+#if !defined(VINT64x4_MOD_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_mod(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VINT64x4_MOD_DEFINED
+#endif
+#if !defined(VINT64x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2)
+{
+	vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT64x4_AVG_DEFINED
+#endif
+#if !defined(VINT64x4_AND_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_AND_DEFINED
+#endif
+#if !defined(VINT64x4_OR_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_OR_DEFINED
+#endif
+#if !defined(VINT64x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_XOR_DEFINED
+#endif
+#if !defined(VINT64x4_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VINT64x4_NOT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x4_MIN_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_MIN_DEFINED
+#endif
+#if !defined(VINT64x4_MAX_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_MAX_DEFINED
+#endif
+#if !defined(VINT64x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+	return vec1;
+}
+# define VINT64x4_RSHIFT_DEFINED
+#endif
+#if !defined(VINT64x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT64x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT64x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	return vec1;
+}
+# define VINT64x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x)
+{
+	vuint64x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VUINT64x4_SPLAT_DEFINED
+#endif
+#if !defined(VUINT64x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT64x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VUINT64x4_LOAD_DEFINED
+#endif
+#if !defined(VUINT64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT64x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VUINT64x4_STORE_DEFINED
+#endif
+#if !defined(VUINT64x4_ADD_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_ADD_DEFINED
+#endif
+#if !defined(VUINT64x4_SUB_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_SUB_DEFINED
+#endif
+#if !defined(VUINT64x4_MUL_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_MUL_DEFINED
+#endif
+#if !defined(VUINT64x4_DIV_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_div(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT64x4_DIV_DEFINED
+#endif
+#if !defined(VUINT64x4_MOD_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_mod(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VUINT64x4_MOD_DEFINED
+#endif
+#if !defined(VUINT64x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+	return vec1;
+}
+# define VUINT64x4_AVG_DEFINED
+#endif
+#if !defined(VUINT64x4_AND_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_AND_DEFINED
+#endif
+#if !defined(VUINT64x4_OR_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_OR_DEFINED
+#endif
+#if !defined(VUINT64x4_XOR_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_XOR_DEFINED
+#endif
+#if !defined(VUINT64x4_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	return vec;
+}
+# define VUINT64x4_NOT_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x4_CMPLT_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x4_CMPGT_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x4_CMPLE_DEFINED
+#endif
+#if !defined(VUINT64x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x4_CMPGE_DEFINED
+#endif
+#if !defined(VUINT64x4_MIN_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_MIN_DEFINED
+#endif
+#if !defined(VUINT64x4_MAX_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VUINT64x4_MAX_DEFINED
+#endif
+#if !defined(VUINT64x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x4_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x4_LSHIFT_DEFINED
+#endif
+#if !defined(VINT64x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x)
+{
+	vint64x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VINT64x8_SPLAT_DEFINED
+#endif
+#if !defined(VINT64x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT64x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VINT64x8_LOAD_DEFINED
+#endif
+#if !defined(VINT64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT64x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VINT64x8_STORE_DEFINED
+#endif
+#if !defined(VINT64x8_ADD_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_ADD_DEFINED
+#endif
+#if !defined(VINT64x8_SUB_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_SUB_DEFINED
+#endif
+#if !defined(VINT64x8_MUL_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_MUL_DEFINED
+#endif
+#if !defined(VINT64x8_DIV_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_div(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT64x8_DIV_DEFINED
+#endif
+#if !defined(VINT64x8_MOD_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_mod(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VINT64x8_MOD_DEFINED
+#endif
+#if !defined(VINT64x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2)
+{
+	vec_int64 x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;
+	x_d_rem = (vec1.generic[0] % 2);
+	y_d_rem = (vec2.generic[0] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[0] = ((vec1.generic[0] / 2) + (vec2.generic[0] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[1] % 2);
+	y_d_rem = (vec2.generic[1] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[1] = ((vec1.generic[1] / 2) + (vec2.generic[1] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[2] % 2);
+	y_d_rem = (vec2.generic[2] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[2] = ((vec1.generic[2] / 2) + (vec2.generic[2] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[3] % 2);
+	y_d_rem = (vec2.generic[3] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[3] = ((vec1.generic[3] / 2) + (vec2.generic[3] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[4] % 2);
+	y_d_rem = (vec2.generic[4] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[4] = ((vec1.generic[4] / 2) + (vec2.generic[4] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[5] % 2);
+	y_d_rem = (vec2.generic[5] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[5] = ((vec1.generic[5] / 2) + (vec2.generic[5] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[6] % 2);
+	y_d_rem = (vec2.generic[6] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[6] = ((vec1.generic[6] / 2) + (vec2.generic[6] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	x_d_rem = (vec1.generic[7] % 2);
+	y_d_rem = (vec2.generic[7] % 2);
+	rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.generic[7] = ((vec1.generic[7] / 2) + (vec2.generic[7] / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return vec1;
+}
+# define VINT64x8_AVG_DEFINED
+#endif
+#if !defined(VINT64x8_AND_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_AND_DEFINED
+#endif
+#if !defined(VINT64x8_OR_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_OR_DEFINED
+#endif
+#if !defined(VINT64x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_XOR_DEFINED
+#endif
+#if !defined(VINT64x8_NOT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VINT64x8_NOT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT64x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT64x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT64x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VINT64x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT64x8_MIN_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_MIN_DEFINED
+#endif
+#if !defined(VINT64x8_MAX_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_MAX_DEFINED
+#endif
+#if !defined(VINT64x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2)
+{
+vec1.generic[0] = ((~vec1.generic[0]) >> vec2.generic[0]);
+vec1.generic[1] = ((~vec1.generic[1]) >> vec2.generic[1]);
+vec1.generic[2] = ((~vec1.generic[2]) >> vec2.generic[2]);
+vec1.generic[3] = ((~vec1.generic[3]) >> vec2.generic[3]);
+vec1.generic[4] = ((~vec1.generic[4]) >> vec2.generic[4]);
+vec1.generic[5] = ((~vec1.generic[5]) >> vec2.generic[5]);
+vec1.generic[6] = ((~vec1.generic[6]) >> vec2.generic[6]);
+vec1.generic[7] = ((~vec1.generic[7]) >> vec2.generic[7]);
+	return vec1;
+}
+# define VINT64x8_RSHIFT_DEFINED
+#endif
+#if !defined(VINT64x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u >>= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u >>= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u >>= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u >>= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u >>= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u >>= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u >>= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u >>= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT64x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT64x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	union { vec_uint64 u; vec_int64 s; } x;
+
+	x.s = vec1.generic[0];
+	x.u <<= vec2.generic[0];
+	vec1.generic[0] = x.s;
+	x.s = vec1.generic[1];
+	x.u <<= vec2.generic[1];
+	vec1.generic[1] = x.s;
+	x.s = vec1.generic[2];
+	x.u <<= vec2.generic[2];
+	vec1.generic[2] = x.s;
+	x.s = vec1.generic[3];
+	x.u <<= vec2.generic[3];
+	vec1.generic[3] = x.s;
+	x.s = vec1.generic[4];
+	x.u <<= vec2.generic[4];
+	vec1.generic[4] = x.s;
+	x.s = vec1.generic[5];
+	x.u <<= vec2.generic[5];
+	vec1.generic[5] = x.s;
+	x.s = vec1.generic[6];
+	x.u <<= vec2.generic[6];
+	vec1.generic[6] = x.s;
+	x.s = vec1.generic[7];
+	x.u <<= vec2.generic[7];
+	vec1.generic[7] = x.s;
+	return vec1;
+}
+# define VINT64x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x)
+{
+	vuint64x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VUINT64x8_SPLAT_DEFINED
+#endif
+#if !defined(VUINT64x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT64x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VUINT64x8_LOAD_DEFINED
+#endif
+#if !defined(VUINT64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT64x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VUINT64x8_STORE_DEFINED
+#endif
+#if !defined(VUINT64x8_ADD_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_ADD_DEFINED
+#endif
+#if !defined(VUINT64x8_SUB_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_SUB_DEFINED
+#endif
+#if !defined(VUINT64x8_MUL_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_MUL_DEFINED
+#endif
+#if !defined(VUINT64x8_DIV_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_div(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT64x8_DIV_DEFINED
+#endif
+#if !defined(VUINT64x8_MOD_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_mod(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] % vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] % vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] % vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] % vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] % vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] % vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] % vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] % vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VUINT64x8_MOD_DEFINED
+#endif
+#if !defined(VUINT64x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2)
+{
+vec1.generic[0] = (vec1.generic[0] >> 1) + (vec2.generic[0] >> 1) + ((vec1.generic[0] | vec2.generic[0]) & 1);
+vec1.generic[1] = (vec1.generic[1] >> 1) + (vec2.generic[1] >> 1) + ((vec1.generic[1] | vec2.generic[1]) & 1);
+vec1.generic[2] = (vec1.generic[2] >> 1) + (vec2.generic[2] >> 1) + ((vec1.generic[2] | vec2.generic[2]) & 1);
+vec1.generic[3] = (vec1.generic[3] >> 1) + (vec2.generic[3] >> 1) + ((vec1.generic[3] | vec2.generic[3]) & 1);
+vec1.generic[4] = (vec1.generic[4] >> 1) + (vec2.generic[4] >> 1) + ((vec1.generic[4] | vec2.generic[4]) & 1);
+vec1.generic[5] = (vec1.generic[5] >> 1) + (vec2.generic[5] >> 1) + ((vec1.generic[5] | vec2.generic[5]) & 1);
+vec1.generic[6] = (vec1.generic[6] >> 1) + (vec2.generic[6] >> 1) + ((vec1.generic[6] | vec2.generic[6]) & 1);
+vec1.generic[7] = (vec1.generic[7] >> 1) + (vec2.generic[7] >> 1) + ((vec1.generic[7] | vec2.generic[7]) & 1);
+	return vec1;
+}
+# define VUINT64x8_AVG_DEFINED
+#endif
+#if !defined(VUINT64x8_AND_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] & vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] & vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] & vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] & vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] & vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] & vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] & vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] & vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_AND_DEFINED
+#endif
+#if !defined(VUINT64x8_OR_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] | vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] | vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] | vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] | vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] | vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] | vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] | vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] | vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_OR_DEFINED
+#endif
+#if !defined(VUINT64x8_XOR_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] ^ vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] ^ vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] ^ vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] ^ vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] ^ vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] ^ vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] ^ vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] ^ vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_XOR_DEFINED
+#endif
+#if !defined(VUINT64x8_NOT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec)
+{
+	vec.generic[0] = ~vec.generic[0];
+	vec.generic[1] = ~vec.generic[1];
+	vec.generic[2] = ~vec.generic[2];
+	vec.generic[3] = ~vec.generic[3];
+	vec.generic[4] = ~vec.generic[4];
+	vec.generic[5] = ~vec.generic[5];
+	vec.generic[6] = ~vec.generic[6];
+	vec.generic[7] = ~vec.generic[7];
+	return vec;
+}
+# define VUINT64x8_NOT_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x8_CMPLT_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x8_CMPGT_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x8_CMPLE_DEFINED
+#endif
+#if !defined(VUINT64x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VUINT64x8_CMPGE_DEFINED
+#endif
+#if !defined(VUINT64x8_MIN_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_MIN_DEFINED
+#endif
+#if !defined(VUINT64x8_MAX_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VUINT64x8_MAX_DEFINED
+#endif
+#if !defined(VUINT64x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] >>= vec2.generic[0];
+	vec1.generic[1] >>= vec2.generic[0];
+	vec1.generic[2] >>= vec2.generic[0];
+	vec1.generic[3] >>= vec2.generic[0];
+	vec1.generic[4] >>= vec2.generic[0];
+	vec1.generic[5] >>= vec2.generic[0];
+	vec1.generic[6] >>= vec2.generic[0];
+	vec1.generic[7] >>= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT64x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.generic[0] <<= vec2.generic[0];
+	vec1.generic[1] <<= vec2.generic[0];
+	vec1.generic[2] <<= vec2.generic[0];
+	vec1.generic[3] <<= vec2.generic[0];
+	vec1.generic[4] <<= vec2.generic[0];
+	vec1.generic[5] <<= vec2.generic[0];
+	vec1.generic[6] <<= vec2.generic[0];
+	vec1.generic[7] <<= vec2.generic[0];
+	return vec1;
+}
+# define VUINT64x8_LSHIFT_DEFINED
+#endif
+#if !defined(VF32x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_splat(vec_f32 x)
+{
+	vf32x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
+# define VF32x2_SPLAT_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_load_aligned(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VF32x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_load(const vec_f32 x[2])
+{
+	vf32x2 vec;
+	memcpy(vec.generic, x, 8);
+	return vec;
+}
+# define VF32x2_LOAD_DEFINED
+#endif
+#if !defined(VF32x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x2_store_aligned(vf32x2 vec, vec_f32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VF32x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x2_store(vf32x2 vec, vec_f32 x[2])
+{
+	memcpy(x, vec.generic, 8);
+}
+# define VF32x2_STORE_DEFINED
+#endif
+#if !defined(VF32x2_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_add(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
+# define VF32x2_ADD_DEFINED
+#endif
+#if !defined(VF32x2_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_sub(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
+# define VF32x2_SUB_DEFINED
+#endif
+#if !defined(VF32x2_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_mul(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
+# define VF32x2_MUL_DEFINED
+#endif
+#if !defined(VF32x2_DIV_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_div(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
+# define VF32x2_DIV_DEFINED
+#endif
+#if !defined(VF32x2_MOD_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_mod(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	return vec1;
+}
+# define VF32x2_MOD_DEFINED
+#endif
+#if !defined(VF32x2_AVG_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_avg(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	return vec1;
+}
+# define VF32x2_AVG_DEFINED
+#endif
+#if !defined(VF32x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_cmplt(vf32x2 vec1, vf32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x2_CMPLT_DEFINED
+#endif
+#if !defined(VF32x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpeq(vf32x2 vec1, vf32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpgt(vf32x2 vec1, vf32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x2_CMPGT_DEFINED
+#endif
+#if !defined(VF32x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_cmple(vf32x2 vec1, vf32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x2_CMPLE_DEFINED
+#endif
+#if !defined(VF32x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_cmpge(vf32x2 vec1, vf32x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x2_CMPGE_DEFINED
+#endif
+#if !defined(VF32x2_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_min(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
+# define VF32x2_MIN_DEFINED
+#endif
+#if !defined(VF32x2_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x2 vf32x2_max(vf32x2 vec1, vf32x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
+# define VF32x2_MAX_DEFINED
+#endif
+#if !defined(VF32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x)
+{
+	vf32x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VF32x4_SPLAT_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VF32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VF32x4_LOAD_DEFINED
+#endif
+#if !defined(VF32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VF32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store(vf32x4 vec, vec_f32 x[4])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VF32x4_STORE_DEFINED
+#endif
+#if !defined(VF32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VF32x4_ADD_DEFINED
+#endif
+#if !defined(VF32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VF32x4_SUB_DEFINED
+#endif
+#if !defined(VF32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VF32x4_MUL_DEFINED
+#endif
+#if !defined(VF32x4_DIV_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VF32x4_DIV_DEFINED
+#endif
+#if !defined(VF32x4_MOD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_mod(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VF32x4_MOD_DEFINED
+#endif
+#if !defined(VF32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_avg(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2;
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2;
+	return vec1;
+}
+# define VF32x4_AVG_DEFINED
+#endif
+#if !defined(VF32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x4_CMPLT_DEFINED
+#endif
+#if !defined(VF32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x4_CMPGT_DEFINED
+#endif
+#if !defined(VF32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x4_CMPLE_DEFINED
+#endif
+#if !defined(VF32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x4_CMPGE_DEFINED
+#endif
+#if !defined(VF32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VF32x4_MIN_DEFINED
+#endif
+#if !defined(VF32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VF32x4_MAX_DEFINED
+#endif
+#if !defined(VF32x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_splat(vec_f32 x)
+{
+	vf32x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VF32x8_SPLAT_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_load_aligned(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VF32x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_load(const vec_f32 x[8])
+{
+	vf32x8 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VF32x8_LOAD_DEFINED
+#endif
+#if !defined(VF32x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x8_store_aligned(vf32x8 vec, vec_f32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VF32x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x8_store(vf32x8 vec, vec_f32 x[8])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VF32x8_STORE_DEFINED
+#endif
+#if !defined(VF32x8_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_add(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VF32x8_ADD_DEFINED
+#endif
+#if !defined(VF32x8_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_sub(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VF32x8_SUB_DEFINED
+#endif
+#if !defined(VF32x8_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_mul(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VF32x8_MUL_DEFINED
+#endif
+#if !defined(VF32x8_DIV_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_div(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VF32x8_DIV_DEFINED
+#endif
+#if !defined(VF32x8_MOD_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_mod(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VF32x8_MOD_DEFINED
+#endif
+#if !defined(VF32x8_AVG_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_avg(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2;
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2;
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2;
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2;
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2;
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2;
+	return vec1;
+}
+# define VF32x8_AVG_DEFINED
+#endif
+#if !defined(VF32x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_cmplt(vf32x8 vec1, vf32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x8_CMPLT_DEFINED
+#endif
+#if !defined(VF32x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpeq(vf32x8 vec1, vf32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpgt(vf32x8 vec1, vf32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x8_CMPGT_DEFINED
+#endif
+#if !defined(VF32x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_cmple(vf32x8 vec1, vf32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x8_CMPLE_DEFINED
+#endif
+#if !defined(VF32x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_cmpge(vf32x8 vec1, vf32x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x8_CMPGE_DEFINED
+#endif
+#if !defined(VF32x8_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_min(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VF32x8_MIN_DEFINED
+#endif
+#if !defined(VF32x8_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x8 vf32x8_max(vf32x8 vec1, vf32x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VF32x8_MAX_DEFINED
+#endif
+#if !defined(VF32x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_splat(vec_f32 x)
+{
+	vf32x16 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	vec.generic[8] = x;
+	vec.generic[9] = x;
+	vec.generic[10] = x;
+	vec.generic[11] = x;
+	vec.generic[12] = x;
+	vec.generic[13] = x;
+	vec.generic[14] = x;
+	vec.generic[15] = x;
+	return vec;
+}
+# define VF32x16_SPLAT_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_load_aligned(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VF32x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_load(const vec_f32 x[16])
+{
+	vf32x16 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VF32x16_LOAD_DEFINED
+#endif
+#if !defined(VF32x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x16_store_aligned(vf32x16 vec, vec_f32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VF32x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x16_STORE_DEFINED)
+VEC_FUNC_IMPL void vf32x16_store(vf32x16 vec, vec_f32 x[16])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VF32x16_STORE_DEFINED
+#endif
+#if !defined(VF32x16_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_add(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]);
+	return vec1;
+}
+# define VF32x16_ADD_DEFINED
+#endif
+#if !defined(VF32x16_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_sub(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] - vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] - vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] - vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] - vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] - vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] - vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] - vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] - vec2.generic[15]);
+	return vec1;
+}
+# define VF32x16_SUB_DEFINED
+#endif
+#if !defined(VF32x16_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_mul(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] * vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] * vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] * vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] * vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] * vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] * vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] * vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] * vec2.generic[15]);
+	return vec1;
+}
+# define VF32x16_MUL_DEFINED
+#endif
+#if !defined(VF32x16_DIV_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_div(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? (vec1.generic[8] / vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? (vec1.generic[9] / vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? (vec1.generic[10] / vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? (vec1.generic[11] / vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? (vec1.generic[12] / vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? (vec1.generic[13] / vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? (vec1.generic[14] / vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? (vec1.generic[15] / vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VF32x16_DIV_DEFINED
+#endif
+#if !defined(VF32x16_MOD_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_mod(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0);
+	vec1.generic[8] = (vec2.generic[8] ? fmod(vec1.generic[8], vec2.generic[8]) : 0);
+	vec1.generic[9] = (vec2.generic[9] ? fmod(vec1.generic[9], vec2.generic[9]) : 0);
+	vec1.generic[10] = (vec2.generic[10] ? fmod(vec1.generic[10], vec2.generic[10]) : 0);
+	vec1.generic[11] = (vec2.generic[11] ? fmod(vec1.generic[11], vec2.generic[11]) : 0);
+	vec1.generic[12] = (vec2.generic[12] ? fmod(vec1.generic[12], vec2.generic[12]) : 0);
+	vec1.generic[13] = (vec2.generic[13] ? fmod(vec1.generic[13], vec2.generic[13]) : 0);
+	vec1.generic[14] = (vec2.generic[14] ? fmod(vec1.generic[14], vec2.generic[14]) : 0);
+	vec1.generic[15] = (vec2.generic[15] ? fmod(vec1.generic[15], vec2.generic[15]) : 0);
+	return vec1;
+}
+# define VF32x16_MOD_DEFINED
+#endif
+#if !defined(VF32x16_AVG_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_avg(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2;
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2;
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2;
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2;
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2;
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2;
+	vec1.generic[8] = (vec1.generic[8] + vec2.generic[8]) / 2;
+	vec1.generic[9] = (vec1.generic[9] + vec2.generic[9]) / 2;
+	vec1.generic[10] = (vec1.generic[10] + vec2.generic[10]) / 2;
+	vec1.generic[11] = (vec1.generic[11] + vec2.generic[11]) / 2;
+	vec1.generic[12] = (vec1.generic[12] + vec2.generic[12]) / 2;
+	vec1.generic[13] = (vec1.generic[13] + vec2.generic[13]) / 2;
+	vec1.generic[14] = (vec1.generic[14] + vec2.generic[14]) / 2;
+	vec1.generic[15] = (vec1.generic[15] + vec2.generic[15]) / 2;
+	return vec1;
+}
+# define VF32x16_AVG_DEFINED
+#endif
+#if !defined(VF32x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_cmplt(vf32x16 vec1, vf32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] < vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] < vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] < vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] < vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] < vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] < vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] < vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] < vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x16_CMPLT_DEFINED
+#endif
+#if !defined(VF32x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpeq(vf32x16 vec1, vf32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] == vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] == vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] == vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] == vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] == vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] == vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] == vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] == vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x16_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpgt(vf32x16 vec1, vf32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] > vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] > vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] > vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] > vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] > vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] > vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] > vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] > vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x16_CMPGT_DEFINED
+#endif
+#if !defined(VF32x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_cmple(vf32x16 vec1, vf32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] <= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] <= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] <= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] <= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] <= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] <= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] <= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] <= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x16_CMPLE_DEFINED
+#endif
+#if !defined(VF32x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_cmpge(vf32x16 vec1, vf32x16 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[8], (vec1.generic[8] >= vec2.generic[8]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[9], (vec1.generic[9] >= vec2.generic[9]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[10], (vec1.generic[10] >= vec2.generic[10]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[11], (vec1.generic[11] >= vec2.generic[11]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[12], (vec1.generic[12] >= vec2.generic[12]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[13], (vec1.generic[13] >= vec2.generic[13]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[14], (vec1.generic[14] >= vec2.generic[14]) ? 0xFF : 0, 4);
+	memset(&vec1.generic[15], (vec1.generic[15] >= vec2.generic[15]) ? 0xFF : 0, 4);
+	return vec1;
+}
+# define VF32x16_CMPGE_DEFINED
+#endif
+#if !defined(VF32x16_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_min(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] < vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] < vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] < vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] < vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] < vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] < vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] < vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] < vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VF32x16_MIN_DEFINED
+#endif
+#if !defined(VF32x16_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x16 vf32x16_max(vf32x16 vec1, vf32x16 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	vec1.generic[8] = (vec1.generic[8] > vec2.generic[8]) ? (vec1.generic[8]) : (vec2.generic[8]);
+	vec1.generic[9] = (vec1.generic[9] > vec2.generic[9]) ? (vec1.generic[9]) : (vec2.generic[9]);
+	vec1.generic[10] = (vec1.generic[10] > vec2.generic[10]) ? (vec1.generic[10]) : (vec2.generic[10]);
+	vec1.generic[11] = (vec1.generic[11] > vec2.generic[11]) ? (vec1.generic[11]) : (vec2.generic[11]);
+	vec1.generic[12] = (vec1.generic[12] > vec2.generic[12]) ? (vec1.generic[12]) : (vec2.generic[12]);
+	vec1.generic[13] = (vec1.generic[13] > vec2.generic[13]) ? (vec1.generic[13]) : (vec2.generic[13]);
+	vec1.generic[14] = (vec1.generic[14] > vec2.generic[14]) ? (vec1.generic[14]) : (vec2.generic[14]);
+	vec1.generic[15] = (vec1.generic[15] > vec2.generic[15]) ? (vec1.generic[15]) : (vec2.generic[15]);
+	return vec1;
+}
+# define VF32x16_MAX_DEFINED
+#endif
+#if !defined(VF64x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x)
+{
+	vf64x2 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	return vec;
+}
+# define VF64x2_SPLAT_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VF64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	memcpy(vec.generic, x, 16);
+	return vec;
+}
+# define VF64x2_LOAD_DEFINED
+#endif
+#if !defined(VF64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VF64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x2_store(vf64x2 vec, vec_f64 x[2])
+{
+	memcpy(x, vec.generic, 16);
+}
+# define VF64x2_STORE_DEFINED
+#endif
+#if !defined(VF64x2_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	return vec1;
+}
+# define VF64x2_ADD_DEFINED
+#endif
+#if !defined(VF64x2_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	return vec1;
+}
+# define VF64x2_SUB_DEFINED
+#endif
+#if !defined(VF64x2_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	return vec1;
+}
+# define VF64x2_MUL_DEFINED
+#endif
+#if !defined(VF64x2_DIV_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	return vec1;
+}
+# define VF64x2_DIV_DEFINED
+#endif
+#if !defined(VF64x2_MOD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_mod(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	return vec1;
+}
+# define VF64x2_MOD_DEFINED
+#endif
+#if !defined(VF64x2_AVG_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_avg(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	return vec1;
+}
+# define VF64x2_AVG_DEFINED
+#endif
+#if !defined(VF64x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x2_CMPLT_DEFINED
+#endif
+#if !defined(VF64x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x2_CMPGT_DEFINED
+#endif
+#if !defined(VF64x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x2_CMPLE_DEFINED
+#endif
+#if !defined(VF64x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x2_CMPGE_DEFINED
+#endif
+#if !defined(VF64x2_MIN_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
+# define VF64x2_MIN_DEFINED
+#endif
+#if !defined(VF64x2_MAX_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	return vec1;
+}
+# define VF64x2_MAX_DEFINED
+#endif
+#if !defined(VF64x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_splat(vec_f64 x)
+{
+	vf64x4 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	return vec;
+}
+# define VF64x4_SPLAT_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_load_aligned(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VF64x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_load(const vec_f64 x[4])
+{
+	vf64x4 vec;
+	memcpy(vec.generic, x, 32);
+	return vec;
+}
+# define VF64x4_LOAD_DEFINED
+#endif
+#if !defined(VF64x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x4_store_aligned(vf64x4 vec, vec_f64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VF64x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x4_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x4_store(vf64x4 vec, vec_f64 x[4])
+{
+	memcpy(x, vec.generic, 32);
+}
+# define VF64x4_STORE_DEFINED
+#endif
+#if !defined(VF64x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_add(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	return vec1;
+}
+# define VF64x4_ADD_DEFINED
+#endif
+#if !defined(VF64x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_sub(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	return vec1;
+}
+# define VF64x4_SUB_DEFINED
+#endif
+#if !defined(VF64x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_mul(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	return vec1;
+}
+# define VF64x4_MUL_DEFINED
+#endif
+#if !defined(VF64x4_DIV_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_div(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VF64x4_DIV_DEFINED
+#endif
+#if !defined(VF64x4_MOD_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_mod(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0);
+	return vec1;
+}
+# define VF64x4_MOD_DEFINED
+#endif
+#if !defined(VF64x4_AVG_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_avg(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2;
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2;
+	return vec1;
+}
+# define VF64x4_AVG_DEFINED
+#endif
+#if !defined(VF64x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_cmplt(vf64x4 vec1, vf64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x4_CMPLT_DEFINED
+#endif
+#if !defined(VF64x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpeq(vf64x4 vec1, vf64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpgt(vf64x4 vec1, vf64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x4_CMPGT_DEFINED
+#endif
+#if !defined(VF64x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_cmple(vf64x4 vec1, vf64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x4_CMPLE_DEFINED
+#endif
+#if !defined(VF64x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_cmpge(vf64x4 vec1, vf64x4 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x4_CMPGE_DEFINED
+#endif
+#if !defined(VF64x4_MIN_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_min(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VF64x4_MIN_DEFINED
+#endif
+#if !defined(VF64x4_MAX_DEFINED)
+VEC_FUNC_IMPL vf64x4 vf64x4_max(vf64x4 vec1, vf64x4 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	return vec1;
+}
+# define VF64x4_MAX_DEFINED
+#endif
+#if !defined(VF64x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_splat(vec_f64 x)
+{
+	vf64x8 vec;
+	vec.generic[0] = x;
+	vec.generic[1] = x;
+	vec.generic[2] = x;
+	vec.generic[3] = x;
+	vec.generic[4] = x;
+	vec.generic[5] = x;
+	vec.generic[6] = x;
+	vec.generic[7] = x;
+	return vec;
+}
+# define VF64x8_SPLAT_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_load_aligned(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VF64x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_load(const vec_f64 x[8])
+{
+	vf64x8 vec;
+	memcpy(vec.generic, x, 64);
+	return vec;
+}
+# define VF64x8_LOAD_DEFINED
+#endif
+#if !defined(VF64x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x8_store_aligned(vf64x8 vec, vec_f64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VF64x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x8_STORE_DEFINED)
+VEC_FUNC_IMPL void vf64x8_store(vf64x8 vec, vec_f64 x[8])
+{
+	memcpy(x, vec.generic, 64);
+}
+# define VF64x8_STORE_DEFINED
+#endif
+#if !defined(VF64x8_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_add(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]);
+	return vec1;
+}
+# define VF64x8_ADD_DEFINED
+#endif
+#if !defined(VF64x8_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_sub(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] - vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] - vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] - vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] - vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] - vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] - vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] - vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] - vec2.generic[7]);
+	return vec1;
+}
+# define VF64x8_SUB_DEFINED
+#endif
+#if !defined(VF64x8_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_mul(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] * vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] * vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] * vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] * vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] * vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] * vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] * vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] * vec2.generic[7]);
+	return vec1;
+}
+# define VF64x8_MUL_DEFINED
+#endif
+#if !defined(VF64x8_DIV_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_div(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? (vec1.generic[0] / vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? (vec1.generic[1] / vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? (vec1.generic[2] / vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? (vec1.generic[3] / vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? (vec1.generic[4] / vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? (vec1.generic[5] / vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? (vec1.generic[6] / vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? (vec1.generic[7] / vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VF64x8_DIV_DEFINED
+#endif
+#if !defined(VF64x8_MOD_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_mod(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec2.generic[0] ? fmod(vec1.generic[0], vec2.generic[0]) : 0);
+	vec1.generic[1] = (vec2.generic[1] ? fmod(vec1.generic[1], vec2.generic[1]) : 0);
+	vec1.generic[2] = (vec2.generic[2] ? fmod(vec1.generic[2], vec2.generic[2]) : 0);
+	vec1.generic[3] = (vec2.generic[3] ? fmod(vec1.generic[3], vec2.generic[3]) : 0);
+	vec1.generic[4] = (vec2.generic[4] ? fmod(vec1.generic[4], vec2.generic[4]) : 0);
+	vec1.generic[5] = (vec2.generic[5] ? fmod(vec1.generic[5], vec2.generic[5]) : 0);
+	vec1.generic[6] = (vec2.generic[6] ? fmod(vec1.generic[6], vec2.generic[6]) : 0);
+	vec1.generic[7] = (vec2.generic[7] ? fmod(vec1.generic[7], vec2.generic[7]) : 0);
+	return vec1;
+}
+# define VF64x8_MOD_DEFINED
+#endif
+#if !defined(VF64x8_AVG_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_avg(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] + vec2.generic[0]) / 2;
+	vec1.generic[1] = (vec1.generic[1] + vec2.generic[1]) / 2;
+	vec1.generic[2] = (vec1.generic[2] + vec2.generic[2]) / 2;
+	vec1.generic[3] = (vec1.generic[3] + vec2.generic[3]) / 2;
+	vec1.generic[4] = (vec1.generic[4] + vec2.generic[4]) / 2;
+	vec1.generic[5] = (vec1.generic[5] + vec2.generic[5]) / 2;
+	vec1.generic[6] = (vec1.generic[6] + vec2.generic[6]) / 2;
+	vec1.generic[7] = (vec1.generic[7] + vec2.generic[7]) / 2;
+	return vec1;
+}
+# define VF64x8_AVG_DEFINED
+#endif
+#if !defined(VF64x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_cmplt(vf64x8 vec1, vf64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] < vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] < vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] < vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] < vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] < vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] < vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] < vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] < vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x8_CMPLT_DEFINED
+#endif
+#if !defined(VF64x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpeq(vf64x8 vec1, vf64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] == vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] == vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] == vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] == vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] == vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] == vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] == vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] == vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x8_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpgt(vf64x8 vec1, vf64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] > vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] > vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] > vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] > vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] > vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] > vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] > vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] > vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x8_CMPGT_DEFINED
+#endif
+#if !defined(VF64x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_cmple(vf64x8 vec1, vf64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] <= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] <= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] <= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] <= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] <= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] <= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] <= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] <= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x8_CMPLE_DEFINED
+#endif
+#if !defined(VF64x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_cmpge(vf64x8 vec1, vf64x8 vec2)
+{
+	memset(&vec1.generic[0], (vec1.generic[0] >= vec2.generic[0]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[1], (vec1.generic[1] >= vec2.generic[1]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[2], (vec1.generic[2] >= vec2.generic[2]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[3], (vec1.generic[3] >= vec2.generic[3]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[4], (vec1.generic[4] >= vec2.generic[4]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[5], (vec1.generic[5] >= vec2.generic[5]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[6], (vec1.generic[6] >= vec2.generic[6]) ? 0xFF : 0, 8);
+	memset(&vec1.generic[7], (vec1.generic[7] >= vec2.generic[7]) ? 0xFF : 0, 8);
+	return vec1;
+}
+# define VF64x8_CMPGE_DEFINED
+#endif
+#if !defined(VF64x8_MIN_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_min(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] < vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] < vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] < vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] < vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] < vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] < vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] < vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] < vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VF64x8_MIN_DEFINED
+#endif
+#if !defined(VF64x8_MAX_DEFINED)
+VEC_FUNC_IMPL vf64x8 vf64x8_max(vf64x8 vec1, vf64x8 vec2)
+{
+	vec1.generic[0] = (vec1.generic[0] > vec2.generic[0]) ? (vec1.generic[0]) : (vec2.generic[0]);
+	vec1.generic[1] = (vec1.generic[1] > vec2.generic[1]) ? (vec1.generic[1]) : (vec2.generic[1]);
+	vec1.generic[2] = (vec1.generic[2] > vec2.generic[2]) ? (vec1.generic[2]) : (vec2.generic[2]);
+	vec1.generic[3] = (vec1.generic[3] > vec2.generic[3]) ? (vec1.generic[3]) : (vec2.generic[3]);
+	vec1.generic[4] = (vec1.generic[4] > vec2.generic[4]) ? (vec1.generic[4]) : (vec2.generic[4]);
+	vec1.generic[5] = (vec1.generic[5] > vec2.generic[5]) ? (vec1.generic[5]) : (vec2.generic[5]);
+	vec1.generic[6] = (vec1.generic[6] > vec2.generic[6]) ? (vec1.generic[6]) : (vec2.generic[6]);
+	vec1.generic[7] = (vec1.generic[7] > vec2.generic[7]) ? (vec1.generic[7]) : (vec2.generic[7]);
+	return vec1;
+}
+# define VF64x8_MAX_DEFINED
+#endif
--- a/include/vec/impl/ppc/altivec.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/ppc/altivec.h	Wed Apr 30 18:36:38 2025 -0400
@@ -25,16 +25,10 @@
 /* This file is automatically generated! Do not edit it directly!
  * Edit the code that generates it in utils/genaltivec.c  --paper */
 
-#ifndef VEC_IMPL_PPC_ALTIVEC_H_
-#define VEC_IMPL_PPC_ALTIVEC_H_
-
-
+/* ------------------------------------------------------------------------ */
+/* PREPROCESSOR HELL INCOMING */
 
-
-/* vuint8x16 */
-
-#if defined(vec_splats) || defined(vec_splat_s8)
-#ifndef VINT8x16_SPLAT_DEFINED
+#if !defined(VINT8x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
 {
 	vint8x16 vec;
@@ -43,8 +37,7 @@
 }
 # define VINT8x16_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VINT8x16_LOAD_ALIGNED_DEFINED
+#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
 {
 	vint8x16 vec;
@@ -53,7 +46,7 @@
 }
 # define VINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x16_LOAD_DEFINED
+#if !defined(VINT8x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
 {
 	vint8x16 vec;
@@ -62,21 +55,14 @@
 }
 # define VINT8x16_LOAD_DEFINED
 #endif
-#ifndef VINT8x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16])
+#if !defined(VINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VINT8x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT8x16_STORE_DEFINED
-VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VINT8x16_STORE_DEFINED
-#endif
-#ifndef VINT8x16_ADD_DEFINED
+#if !defined(VINT8x16_ADD_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -85,7 +71,7 @@
 }
 # define VINT8x16_ADD_DEFINED
 #endif
-#ifndef VINT8x16_SUB_DEFINED
+#if !defined(VINT8x16_SUB_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -94,8 +80,7 @@
 }
 # define VINT8x16_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VINT8x16_MUL_DEFINED
+#if !defined(VINT8x16_MUL_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -104,8 +89,16 @@
 }
 # define VINT8x16_MUL_DEFINED
 #endif
+#if !defined(VINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_AVG_DEFINED
 #endif
-#ifndef VINT8x16_AND_DEFINED
+#if !defined(VINT8x16_AND_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -114,7 +107,7 @@
 }
 # define VINT8x16_AND_DEFINED
 #endif
-#ifndef VINT8x16_OR_DEFINED
+#if !defined(VINT8x16_OR_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -123,7 +116,7 @@
 }
 # define VINT8x16_OR_DEFINED
 #endif
-#ifndef VINT8x16_XOR_DEFINED
+#if !defined(VINT8x16_XOR_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -132,7 +125,7 @@
 }
 # define VINT8x16_XOR_DEFINED
 #endif
-#ifndef VINT8x16_CMPLT_DEFINED
+#if !defined(VINT8x16_CMPLT_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -141,7 +134,7 @@
 }
 # define VINT8x16_CMPLT_DEFINED
 #endif
-#ifndef VINT8x16_CMPEQ_DEFINED
+#if !defined(VINT8x16_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -150,7 +143,7 @@
 }
 # define VINT8x16_CMPEQ_DEFINED
 #endif
-#ifndef VINT8x16_CMPGT_DEFINED
+#if !defined(VINT8x16_CMPGT_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -159,7 +152,7 @@
 }
 # define VINT8x16_CMPGT_DEFINED
 #endif
-#ifndef VINT8x16_MIN_DEFINED
+#if !defined(VINT8x16_MIN_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -168,7 +161,7 @@
 }
 # define VINT8x16_MIN_DEFINED
 #endif
-#ifndef VINT8x16_MAX_DEFINED
+#if !defined(VINT8x16_MAX_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
 {
 	vint8x16 vec;
@@ -177,34 +170,7 @@
 }
 # define VINT8x16_MAX_DEFINED
 #endif
-#ifndef VINT8x16_AVG_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
-{
-	vint8x16 vec;
-	vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT8x16_AVG_DEFINED
-#endif
-#ifndef VINT8x16_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
-{
-	vint8x16 vec;
-	vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT8x16_LSHIFT_DEFINED
-#endif
-#ifndef VINT8x16_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
-{
-	vint8x16 vec;
-	vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT8x16_LRSHIFT_DEFINED
-#endif
-#ifndef VINT8x16_RSHIFT_DEFINED
+#if !defined(VINT8x16_RSHIFT_DEFINED)
 VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
 {
 	vint8x16 vec;
@@ -213,12 +179,25 @@
 }
 # define VINT8x16_RSHIFT_DEFINED
 #endif
-
-
-/* vint8x16 */
-
-#if defined(vec_splats) || defined(vec_splat_u8)
-#ifndef VUINT8x16_SPLAT_DEFINED
+#if !defined(VINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
 {
 	vuint8x16 vec;
@@ -227,8 +206,7 @@
 }
 # define VUINT8x16_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
 {
 	vuint8x16 vec;
@@ -237,7 +215,7 @@
 }
 # define VUINT8x16_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x16_LOAD_DEFINED
+#if !defined(VUINT8x16_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
 {
 	vuint8x16 vec;
@@ -246,21 +224,14 @@
 }
 # define VUINT8x16_LOAD_DEFINED
 #endif
-#ifndef VUINT8x16_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16])
+#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VUINT8x16_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT8x16_STORE_DEFINED
-VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VUINT8x16_STORE_DEFINED
-#endif
-#ifndef VUINT8x16_ADD_DEFINED
+#if !defined(VUINT8x16_ADD_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -269,7 +240,7 @@
 }
 # define VUINT8x16_ADD_DEFINED
 #endif
-#ifndef VUINT8x16_SUB_DEFINED
+#if !defined(VUINT8x16_SUB_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -278,8 +249,7 @@
 }
 # define VUINT8x16_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VUINT8x16_MUL_DEFINED
+#if !defined(VUINT8x16_MUL_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -288,8 +258,16 @@
 }
 # define VUINT8x16_MUL_DEFINED
 #endif
+#if !defined(VUINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_AVG_DEFINED
 #endif
-#ifndef VUINT8x16_AND_DEFINED
+#if !defined(VUINT8x16_AND_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -298,7 +276,7 @@
 }
 # define VUINT8x16_AND_DEFINED
 #endif
-#ifndef VUINT8x16_OR_DEFINED
+#if !defined(VUINT8x16_OR_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -307,7 +285,7 @@
 }
 # define VUINT8x16_OR_DEFINED
 #endif
-#ifndef VUINT8x16_XOR_DEFINED
+#if !defined(VUINT8x16_XOR_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -316,7 +294,7 @@
 }
 # define VUINT8x16_XOR_DEFINED
 #endif
-#ifndef VUINT8x16_CMPLT_DEFINED
+#if !defined(VUINT8x16_CMPLT_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -325,7 +303,7 @@
 }
 # define VUINT8x16_CMPLT_DEFINED
 #endif
-#ifndef VUINT8x16_CMPEQ_DEFINED
+#if !defined(VUINT8x16_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -334,7 +312,7 @@
 }
 # define VUINT8x16_CMPEQ_DEFINED
 #endif
-#ifndef VUINT8x16_CMPGT_DEFINED
+#if !defined(VUINT8x16_CMPGT_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -343,7 +321,7 @@
 }
 # define VUINT8x16_CMPGT_DEFINED
 #endif
-#ifndef VUINT8x16_MIN_DEFINED
+#if !defined(VUINT8x16_MIN_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -352,7 +330,7 @@
 }
 # define VUINT8x16_MIN_DEFINED
 #endif
-#ifndef VUINT8x16_MAX_DEFINED
+#if !defined(VUINT8x16_MAX_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -361,16 +339,25 @@
 }
 # define VUINT8x16_MAX_DEFINED
 #endif
-#ifndef VUINT8x16_AVG_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+#if !defined(VUINT8x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
-	vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec);
+	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VUINT8x16_AVG_DEFINED
+# define VUINT8x16_RSHIFT_DEFINED
 #endif
-#ifndef VUINT8x16_LSHIFT_DEFINED
+#if !defined(VUINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_LSHIFT_DEFINED)
 VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
 {
 	vuint8x16 vec;
@@ -379,30 +366,7 @@
 }
 # define VUINT8x16_LSHIFT_DEFINED
 #endif
-#ifndef VUINT8x16_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
-{
-	vuint8x16 vec;
-	vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VUINT8x16_LRSHIFT_DEFINED
-#endif
-#ifndef VUINT8x16_RSHIFT_DEFINED
-VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
-{
-	vuint8x16 vec;
-	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VUINT8x16_RSHIFT_DEFINED
-#endif
-
-
-/* vuint16x8 */
-
-#if defined(vec_splats) || defined(vec_splat_s16)
-#ifndef VINT16x8_SPLAT_DEFINED
+#if !defined(VINT16x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
 {
 	vint16x8 vec;
@@ -411,8 +375,7 @@
 }
 # define VINT16x8_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VINT16x8_LOAD_ALIGNED_DEFINED
+#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
 {
 	vint16x8 vec;
@@ -421,7 +384,7 @@
 }
 # define VINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x8_LOAD_DEFINED
+#if !defined(VINT16x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
 {
 	vint16x8 vec;
@@ -430,21 +393,14 @@
 }
 # define VINT16x8_LOAD_DEFINED
 #endif
-#ifndef VINT16x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8])
+#if !defined(VINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VINT16x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT16x8_STORE_DEFINED
-VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VINT16x8_STORE_DEFINED
-#endif
-#ifndef VINT16x8_ADD_DEFINED
+#if !defined(VINT16x8_ADD_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -453,7 +409,7 @@
 }
 # define VINT16x8_ADD_DEFINED
 #endif
-#ifndef VINT16x8_SUB_DEFINED
+#if !defined(VINT16x8_SUB_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -462,8 +418,7 @@
 }
 # define VINT16x8_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VINT16x8_MUL_DEFINED
+#if !defined(VINT16x8_MUL_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -472,8 +427,16 @@
 }
 # define VINT16x8_MUL_DEFINED
 #endif
+#if !defined(VINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_AVG_DEFINED
 #endif
-#ifndef VINT16x8_AND_DEFINED
+#if !defined(VINT16x8_AND_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -482,7 +445,7 @@
 }
 # define VINT16x8_AND_DEFINED
 #endif
-#ifndef VINT16x8_OR_DEFINED
+#if !defined(VINT16x8_OR_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -491,7 +454,7 @@
 }
 # define VINT16x8_OR_DEFINED
 #endif
-#ifndef VINT16x8_XOR_DEFINED
+#if !defined(VINT16x8_XOR_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -500,7 +463,7 @@
 }
 # define VINT16x8_XOR_DEFINED
 #endif
-#ifndef VINT16x8_CMPLT_DEFINED
+#if !defined(VINT16x8_CMPLT_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -509,7 +472,7 @@
 }
 # define VINT16x8_CMPLT_DEFINED
 #endif
-#ifndef VINT16x8_CMPEQ_DEFINED
+#if !defined(VINT16x8_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -518,7 +481,7 @@
 }
 # define VINT16x8_CMPEQ_DEFINED
 #endif
-#ifndef VINT16x8_CMPGT_DEFINED
+#if !defined(VINT16x8_CMPGT_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -527,7 +490,7 @@
 }
 # define VINT16x8_CMPGT_DEFINED
 #endif
-#ifndef VINT16x8_MIN_DEFINED
+#if !defined(VINT16x8_MIN_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -536,7 +499,7 @@
 }
 # define VINT16x8_MIN_DEFINED
 #endif
-#ifndef VINT16x8_MAX_DEFINED
+#if !defined(VINT16x8_MAX_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
 {
 	vint16x8 vec;
@@ -545,25 +508,16 @@
 }
 # define VINT16x8_MAX_DEFINED
 #endif
-#ifndef VINT16x8_AVG_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+#if !defined(VINT16x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
 {
 	vint16x8 vec;
-	vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec);
+	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VINT16x8_AVG_DEFINED
+# define VINT16x8_RSHIFT_DEFINED
 #endif
-#ifndef VINT16x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
-{
-	vint16x8 vec;
-	vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT16x8_LSHIFT_DEFINED
-#endif
-#ifndef VINT16x8_LRSHIFT_DEFINED
+#if !defined(VINT16x8_LRSHIFT_DEFINED)
 VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
 {
 	vint16x8 vec;
@@ -572,21 +526,16 @@
 }
 # define VINT16x8_LRSHIFT_DEFINED
 #endif
-#ifndef VINT16x8_RSHIFT_DEFINED
-VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
+#if !defined(VINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
 {
 	vint16x8 vec;
-	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
+	vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VINT16x8_RSHIFT_DEFINED
+# define VINT16x8_LSHIFT_DEFINED
 #endif
-
-
-/* vint16x8 */
-
-#if defined(vec_splats) || defined(vec_splat_u16)
-#ifndef VUINT16x8_SPLAT_DEFINED
+#if !defined(VUINT16x8_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
 {
 	vuint16x8 vec;
@@ -595,8 +544,7 @@
 }
 # define VUINT16x8_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
 {
 	vuint16x8 vec;
@@ -605,7 +553,7 @@
 }
 # define VUINT16x8_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x8_LOAD_DEFINED
+#if !defined(VUINT16x8_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
 {
 	vuint16x8 vec;
@@ -614,21 +562,14 @@
 }
 # define VUINT16x8_LOAD_DEFINED
 #endif
-#ifndef VUINT16x8_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8])
+#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VUINT16x8_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT16x8_STORE_DEFINED
-VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VUINT16x8_STORE_DEFINED
-#endif
-#ifndef VUINT16x8_ADD_DEFINED
+#if !defined(VUINT16x8_ADD_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -637,7 +578,7 @@
 }
 # define VUINT16x8_ADD_DEFINED
 #endif
-#ifndef VUINT16x8_SUB_DEFINED
+#if !defined(VUINT16x8_SUB_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -646,8 +587,7 @@
 }
 # define VUINT16x8_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VUINT16x8_MUL_DEFINED
+#if !defined(VUINT16x8_MUL_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -656,8 +596,16 @@
 }
 # define VUINT16x8_MUL_DEFINED
 #endif
+#if !defined(VUINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_AVG_DEFINED
 #endif
-#ifndef VUINT16x8_AND_DEFINED
+#if !defined(VUINT16x8_AND_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -666,7 +614,7 @@
 }
 # define VUINT16x8_AND_DEFINED
 #endif
-#ifndef VUINT16x8_OR_DEFINED
+#if !defined(VUINT16x8_OR_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -675,7 +623,7 @@
 }
 # define VUINT16x8_OR_DEFINED
 #endif
-#ifndef VUINT16x8_XOR_DEFINED
+#if !defined(VUINT16x8_XOR_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -684,7 +632,7 @@
 }
 # define VUINT16x8_XOR_DEFINED
 #endif
-#ifndef VUINT16x8_CMPLT_DEFINED
+#if !defined(VUINT16x8_CMPLT_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -693,7 +641,7 @@
 }
 # define VUINT16x8_CMPLT_DEFINED
 #endif
-#ifndef VUINT16x8_CMPEQ_DEFINED
+#if !defined(VUINT16x8_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -702,7 +650,7 @@
 }
 # define VUINT16x8_CMPEQ_DEFINED
 #endif
-#ifndef VUINT16x8_CMPGT_DEFINED
+#if !defined(VUINT16x8_CMPGT_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -711,7 +659,7 @@
 }
 # define VUINT16x8_CMPGT_DEFINED
 #endif
-#ifndef VUINT16x8_MIN_DEFINED
+#if !defined(VUINT16x8_MIN_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -720,7 +668,7 @@
 }
 # define VUINT16x8_MIN_DEFINED
 #endif
-#ifndef VUINT16x8_MAX_DEFINED
+#if !defined(VUINT16x8_MAX_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -729,34 +677,7 @@
 }
 # define VUINT16x8_MAX_DEFINED
 #endif
-#ifndef VUINT16x8_AVG_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
-{
-	vuint16x8 vec;
-	vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VUINT16x8_AVG_DEFINED
-#endif
-#ifndef VUINT16x8_LSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
-{
-	vuint16x8 vec;
-	vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VUINT16x8_LSHIFT_DEFINED
-#endif
-#ifndef VUINT16x8_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
-{
-	vuint16x8 vec;
-	vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VUINT16x8_LRSHIFT_DEFINED
-#endif
-#ifndef VUINT16x8_RSHIFT_DEFINED
+#if !defined(VUINT16x8_RSHIFT_DEFINED)
 VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
 {
 	vuint16x8 vec;
@@ -765,12 +686,25 @@
 }
 # define VUINT16x8_RSHIFT_DEFINED
 #endif
-
-
-/* vuint32x4 */
-
-#if defined(vec_splats) || defined(vec_splat_s32)
-#ifndef VINT32x4_SPLAT_DEFINED
+#if !defined(VUINT16x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
 {
 	vint32x4 vec;
@@ -779,8 +713,7 @@
 }
 # define VINT32x4_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VINT32x4_LOAD_ALIGNED_DEFINED
+#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
 {
 	vint32x4 vec;
@@ -789,7 +722,7 @@
 }
 # define VINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x4_LOAD_DEFINED
+#if !defined(VINT32x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
 {
 	vint32x4 vec;
@@ -798,21 +731,14 @@
 }
 # define VINT32x4_LOAD_DEFINED
 #endif
-#ifndef VINT32x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4])
+#if !defined(VINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VINT32x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VINT32x4_STORE_DEFINED
-VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VINT32x4_STORE_DEFINED
-#endif
-#ifndef VINT32x4_ADD_DEFINED
+#if !defined(VINT32x4_ADD_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -821,7 +747,7 @@
 }
 # define VINT32x4_ADD_DEFINED
 #endif
-#ifndef VINT32x4_SUB_DEFINED
+#if !defined(VINT32x4_SUB_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -830,8 +756,7 @@
 }
 # define VINT32x4_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VINT32x4_MUL_DEFINED
+#if !defined(VINT32x4_MUL_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -840,8 +765,16 @@
 }
 # define VINT32x4_MUL_DEFINED
 #endif
+#if !defined(VINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_AVG_DEFINED
 #endif
-#ifndef VINT32x4_AND_DEFINED
+#if !defined(VINT32x4_AND_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -850,7 +783,7 @@
 }
 # define VINT32x4_AND_DEFINED
 #endif
-#ifndef VINT32x4_OR_DEFINED
+#if !defined(VINT32x4_OR_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -859,7 +792,7 @@
 }
 # define VINT32x4_OR_DEFINED
 #endif
-#ifndef VINT32x4_XOR_DEFINED
+#if !defined(VINT32x4_XOR_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -868,7 +801,7 @@
 }
 # define VINT32x4_XOR_DEFINED
 #endif
-#ifndef VINT32x4_CMPLT_DEFINED
+#if !defined(VINT32x4_CMPLT_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -877,7 +810,7 @@
 }
 # define VINT32x4_CMPLT_DEFINED
 #endif
-#ifndef VINT32x4_CMPEQ_DEFINED
+#if !defined(VINT32x4_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -886,7 +819,7 @@
 }
 # define VINT32x4_CMPEQ_DEFINED
 #endif
-#ifndef VINT32x4_CMPGT_DEFINED
+#if !defined(VINT32x4_CMPGT_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -895,7 +828,7 @@
 }
 # define VINT32x4_CMPGT_DEFINED
 #endif
-#ifndef VINT32x4_MIN_DEFINED
+#if !defined(VINT32x4_MIN_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -904,7 +837,7 @@
 }
 # define VINT32x4_MIN_DEFINED
 #endif
-#ifndef VINT32x4_MAX_DEFINED
+#if !defined(VINT32x4_MAX_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
 {
 	vint32x4 vec;
@@ -913,16 +846,25 @@
 }
 # define VINT32x4_MAX_DEFINED
 #endif
-#ifndef VINT32x4_AVG_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+#if !defined(VINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
 {
 	vint32x4 vec;
-	vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec);
+	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VINT32x4_AVG_DEFINED
+# define VINT32x4_RSHIFT_DEFINED
 #endif
-#ifndef VINT32x4_LSHIFT_DEFINED
+#if !defined(VINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_LSHIFT_DEFINED)
 VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
 {
 	vint32x4 vec;
@@ -931,30 +873,7 @@
 }
 # define VINT32x4_LSHIFT_DEFINED
 #endif
-#ifndef VINT32x4_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
-{
-	vint32x4 vec;
-	vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT32x4_LRSHIFT_DEFINED
-#endif
-#ifndef VINT32x4_RSHIFT_DEFINED
-VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
-{
-	vint32x4 vec;
-	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
-	return vec;
-}
-# define VINT32x4_RSHIFT_DEFINED
-#endif
-
-
-/* vint32x4 */
-
-#if defined(vec_splats) || defined(vec_splat_u32)
-#ifndef VUINT32x4_SPLAT_DEFINED
+#if !defined(VUINT32x4_SPLAT_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
 {
 	vuint32x4 vec;
@@ -963,8 +882,7 @@
 }
 # define VUINT32x4_SPLAT_DEFINED
 #endif
-#endif
-#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED
+#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
 {
 	vuint32x4 vec;
@@ -973,7 +891,7 @@
 }
 # define VUINT32x4_LOAD_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x4_LOAD_DEFINED
+#if !defined(VUINT32x4_LOAD_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
 {
 	vuint32x4 vec;
@@ -982,21 +900,14 @@
 }
 # define VUINT32x4_LOAD_DEFINED
 #endif
-#ifndef VUINT32x4_STORE_ALIGNED_DEFINED
-VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4])
+#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4])
 {
-	vec_st(vec.altivec, 0, arr);
+	vec_st(vec.altivec, 0, x);
 }
 # define VUINT32x4_STORE_ALIGNED_DEFINED
 #endif
-#ifndef VUINT32x4_STORE_DEFINED
-VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4])
-{
-	memcpy(arr, &vec, sizeof(vec));
-}
-# define VUINT32x4_STORE_DEFINED
-#endif
-#ifndef VUINT32x4_ADD_DEFINED
+#if !defined(VUINT32x4_ADD_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1005,7 +916,7 @@
 }
 # define VUINT32x4_ADD_DEFINED
 #endif
-#ifndef VUINT32x4_SUB_DEFINED
+#if !defined(VUINT32x4_SUB_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1014,8 +925,7 @@
 }
 # define VUINT32x4_SUB_DEFINED
 #endif
-#ifdef vec_mul
-#ifndef VUINT32x4_MUL_DEFINED
+#if !defined(VUINT32x4_MUL_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1024,8 +934,16 @@
 }
 # define VUINT32x4_MUL_DEFINED
 #endif
+#if !defined(VUINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_AVG_DEFINED
 #endif
-#ifndef VUINT32x4_AND_DEFINED
+#if !defined(VUINT32x4_AND_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1034,7 +952,7 @@
 }
 # define VUINT32x4_AND_DEFINED
 #endif
-#ifndef VUINT32x4_OR_DEFINED
+#if !defined(VUINT32x4_OR_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1043,7 +961,7 @@
 }
 # define VUINT32x4_OR_DEFINED
 #endif
-#ifndef VUINT32x4_XOR_DEFINED
+#if !defined(VUINT32x4_XOR_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1052,7 +970,7 @@
 }
 # define VUINT32x4_XOR_DEFINED
 #endif
-#ifndef VUINT32x4_CMPLT_DEFINED
+#if !defined(VUINT32x4_CMPLT_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1061,7 +979,7 @@
 }
 # define VUINT32x4_CMPLT_DEFINED
 #endif
-#ifndef VUINT32x4_CMPEQ_DEFINED
+#if !defined(VUINT32x4_CMPEQ_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1070,7 +988,7 @@
 }
 # define VUINT32x4_CMPEQ_DEFINED
 #endif
-#ifndef VUINT32x4_CMPGT_DEFINED
+#if !defined(VUINT32x4_CMPGT_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1079,7 +997,7 @@
 }
 # define VUINT32x4_CMPGT_DEFINED
 #endif
-#ifndef VUINT32x4_MIN_DEFINED
+#if !defined(VUINT32x4_MIN_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1088,7 +1006,7 @@
 }
 # define VUINT32x4_MIN_DEFINED
 #endif
-#ifndef VUINT32x4_MAX_DEFINED
+#if !defined(VUINT32x4_MAX_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1097,16 +1015,25 @@
 }
 # define VUINT32x4_MAX_DEFINED
 #endif
-#ifndef VUINT32x4_AVG_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+#if !defined(VUINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
-	vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec);
+	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VUINT32x4_AVG_DEFINED
+# define VUINT32x4_RSHIFT_DEFINED
 #endif
-#ifndef VUINT32x4_LSHIFT_DEFINED
+#if !defined(VUINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_LSHIFT_DEFINED)
 VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
 {
 	vuint32x4 vec;
@@ -1115,23 +1042,109 @@
 }
 # define VUINT32x4_LSHIFT_DEFINED
 #endif
-#ifndef VUINT32x4_LRSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+#if !defined(VF32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x)
+{
+	vf32x4 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VF32x4_SPLAT_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VF32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4])
 {
-	vuint32x4 vec;
-	vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec);
+	vf32x4 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VF32x4_LOAD_DEFINED
+#endif
+#if !defined(VF32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VF32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_ADD_DEFINED
+#endif
+#if !defined(VF32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_sub(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VUINT32x4_LRSHIFT_DEFINED
+# define VF32x4_SUB_DEFINED
+#endif
+#if !defined(VF32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MUL_DEFINED
 #endif
-#ifndef VUINT32x4_RSHIFT_DEFINED
-VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
+#if !defined(VF32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2)
 {
-	vuint32x4 vec;
-	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPLT_DEFINED
+#endif
+#if !defined(VF32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmpeq(vec1.altivec, vec2.altivec);
 	return vec;
 }
-# define VUINT32x4_RSHIFT_DEFINED
+# define VF32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPGT_DEFINED
 #endif
-#endif /* VEC_IMPL_PPC_ALTIVEC_H_ */
-
+#if !defined(VF32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MIN_DEFINED
+#endif
+#if !defined(VF32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MAX_DEFINED
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/vec/impl/ppc/vsx.h	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,1418 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+/* This file is automatically generated! Do not edit it directly!
+ * Edit the code that generates it in utils/genvsx.c  --paper */
+
+/* ------------------------------------------------------------------------ */
+/* PREPROCESSOR HELL INCOMING */
+
+#if !defined(VINT8x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
+{
+	vint8x16 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VINT8x16_SPLAT_DEFINED
+#endif
+#if !defined(VINT8x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VINT8x16_LOAD_DEFINED
+#endif
+#if !defined(VINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 x[16])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT8x16_ADD_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_ADD_DEFINED
+#endif
+#if !defined(VINT8x16_SUB_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_SUB_DEFINED
+#endif
+#if !defined(VINT8x16_MUL_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_MUL_DEFINED
+#endif
+#if !defined(VINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_AVG_DEFINED
+#endif
+#if !defined(VINT8x16_AND_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_AND_DEFINED
+#endif
+#if !defined(VINT8x16_OR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_OR_DEFINED
+#endif
+#if !defined(VINT8x16_XOR_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_XOR_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_CMPLT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_CMPEQ_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_CMPGT_DEFINED
+#endif
+#if !defined(VINT8x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_CMPLE_DEFINED
+#endif
+#if !defined(VINT8x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_CMPGE_DEFINED
+#endif
+#if !defined(VINT8x16_MIN_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_MIN_DEFINED
+#endif
+#if !defined(VINT8x16_MAX_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_MAX_DEFINED
+#endif
+#if !defined(VINT8x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_RSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT8x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vint8x16 vec;
+	vec.altivec = (vector signed char)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
+{
+	vuint8x16 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VUINT8x16_SPLAT_DEFINED
+#endif
+#if !defined(VUINT8x16_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VUINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x16_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VUINT8x16_LOAD_DEFINED
+#endif
+#if !defined(VUINT8x16_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 x[16])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VUINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT8x16_ADD_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_ADD_DEFINED
+#endif
+#if !defined(VUINT8x16_SUB_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_SUB_DEFINED
+#endif
+#if !defined(VUINT8x16_MUL_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_MUL_DEFINED
+#endif
+#if !defined(VUINT8x16_AVG_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_AVG_DEFINED
+#endif
+#if !defined(VUINT8x16_AND_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_AND_DEFINED
+#endif
+#if !defined(VUINT8x16_OR_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_OR_DEFINED
+#endif
+#if !defined(VUINT8x16_XOR_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_XOR_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_CMPLT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_CMPGT_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_CMPLE_DEFINED
+#endif
+#if !defined(VUINT8x16_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_CMPGE_DEFINED
+#endif
+#if !defined(VUINT8x16_MIN_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_MIN_DEFINED
+#endif
+#if !defined(VUINT8x16_MAX_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_MAX_DEFINED
+#endif
+#if !defined(VUINT8x16_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT8x16_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 vec;
+	vec.altivec = (vector unsigned char)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT8x16_LSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
+{
+	vint16x8 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VINT16x8_SPLAT_DEFINED
+#endif
+#if !defined(VINT16x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VINT16x8_LOAD_DEFINED
+#endif
+#if !defined(VINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 x[8])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT16x8_ADD_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_ADD_DEFINED
+#endif
+#if !defined(VINT16x8_SUB_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_SUB_DEFINED
+#endif
+#if !defined(VINT16x8_MUL_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_MUL_DEFINED
+#endif
+#if !defined(VINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_AVG_DEFINED
+#endif
+#if !defined(VINT16x8_AND_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_AND_DEFINED
+#endif
+#if !defined(VINT16x8_OR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_OR_DEFINED
+#endif
+#if !defined(VINT16x8_XOR_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_XOR_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_CMPLT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_CMPEQ_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_CMPGT_DEFINED
+#endif
+#if !defined(VINT16x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_CMPLE_DEFINED
+#endif
+#if !defined(VINT16x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_CMPGE_DEFINED
+#endif
+#if !defined(VINT16x8_MIN_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_MIN_DEFINED
+#endif
+#if !defined(VINT16x8_MAX_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_MAX_DEFINED
+#endif
+#if !defined(VINT16x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_RSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vint16x8 vec;
+	vec.altivec = (vector signed short)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
+{
+	vuint16x8 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VUINT16x8_SPLAT_DEFINED
+#endif
+#if !defined(VUINT16x8_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VUINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x8_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VUINT16x8_LOAD_DEFINED
+#endif
+#if !defined(VUINT16x8_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 x[8])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VUINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT16x8_ADD_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_ADD_DEFINED
+#endif
+#if !defined(VUINT16x8_SUB_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_SUB_DEFINED
+#endif
+#if !defined(VUINT16x8_MUL_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_MUL_DEFINED
+#endif
+#if !defined(VUINT16x8_AVG_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_AVG_DEFINED
+#endif
+#if !defined(VUINT16x8_AND_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_AND_DEFINED
+#endif
+#if !defined(VUINT16x8_OR_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_OR_DEFINED
+#endif
+#if !defined(VUINT16x8_XOR_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_XOR_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_CMPLT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_CMPGT_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_CMPLE_DEFINED
+#endif
+#if !defined(VUINT16x8_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_CMPGE_DEFINED
+#endif
+#if !defined(VUINT16x8_MIN_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_MIN_DEFINED
+#endif
+#if !defined(VUINT16x8_MAX_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_MAX_DEFINED
+#endif
+#if !defined(VUINT16x8_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT16x8_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 vec;
+	vec.altivec = (vector unsigned short)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT16x8_LSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
+{
+	vint32x4 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VINT32x4_SPLAT_DEFINED
+#endif
+#if !defined(VINT32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VINT32x4_LOAD_DEFINED
+#endif
+#if !defined(VINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 x[4])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VINT32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_ADD_DEFINED
+#endif
+#if !defined(VINT32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_SUB_DEFINED
+#endif
+#if !defined(VINT32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_MUL_DEFINED
+#endif
+#if !defined(VINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_AVG_DEFINED
+#endif
+#if !defined(VINT32x4_AND_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_AND_DEFINED
+#endif
+#if !defined(VINT32x4_OR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_OR_DEFINED
+#endif
+#if !defined(VINT32x4_XOR_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_XOR_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_CMPLT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_CMPGT_DEFINED
+#endif
+#if !defined(VINT32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_CMPLE_DEFINED
+#endif
+#if !defined(VINT32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_CMPGE_DEFINED
+#endif
+#if !defined(VINT32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_MIN_DEFINED
+#endif
+#if !defined(VINT32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_MAX_DEFINED
+#endif
+#if !defined(VINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = vec_sra(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_RSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VINT32x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vint32x4 vec;
+	vec.altivec = (vector signed int)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
+{
+	vuint32x4 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VUINT32x4_SPLAT_DEFINED
+#endif
+#if !defined(VUINT32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VUINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VUINT32x4_LOAD_DEFINED
+#endif
+#if !defined(VUINT32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 x[4])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VUINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VUINT32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_ADD_DEFINED
+#endif
+#if !defined(VUINT32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_SUB_DEFINED
+#endif
+#if !defined(VUINT32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_MUL_DEFINED
+#endif
+#if !defined(VUINT32x4_AVG_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_avg(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_AVG_DEFINED
+#endif
+#if !defined(VUINT32x4_AND_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_and(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_AND_DEFINED
+#endif
+#if !defined(VUINT32x4_OR_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_or(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_OR_DEFINED
+#endif
+#if !defined(VUINT32x4_XOR_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_xor(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_XOR_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_CMPLT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_CMPGT_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_CMPLE_DEFINED
+#endif
+#if !defined(VUINT32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_CMPGE_DEFINED
+#endif
+#if !defined(VUINT32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_MIN_DEFINED
+#endif
+#if !defined(VUINT32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_MAX_DEFINED
+#endif
+#if !defined(VUINT32x4_RSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_RSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_LRSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_sr(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_LRSHIFT_DEFINED
+#endif
+#if !defined(VUINT32x4_LSHIFT_DEFINED)
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 vec;
+	vec.altivec = (vector unsigned int)vec_sl(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VUINT32x4_LSHIFT_DEFINED
+#endif
+#if !defined(VF32x4_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_splat(vec_f32 x)
+{
+	vf32x4 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VF32x4_SPLAT_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load_aligned(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VF32x4_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_LOAD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_load(const vec_f32 x[4])
+{
+	vf32x4 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VF32x4_LOAD_DEFINED
+#endif
+#if !defined(VF32x4_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf32x4_store_aligned(vf32x4 vec, vec_f32 x[4])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VF32x4_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF32x4_ADD_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_add(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_ADD_DEFINED
+#endif
+#if !defined(VF32x4_SUB_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_sub(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_SUB_DEFINED
+#endif
+#if !defined(VF32x4_MUL_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_mul(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MUL_DEFINED
+#endif
+#if !defined(VF32x4_DIV_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_div(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_div(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_DIV_DEFINED
+#endif
+#if !defined(VF32x4_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmplt(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPLT_DEFINED
+#endif
+#if !defined(VF32x4_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpeq(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPEQ_DEFINED
+#endif
+#if !defined(VF32x4_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpgt(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPGT_DEFINED
+#endif
+#if !defined(VF32x4_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmple(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPLE_DEFINED
+#endif
+#if !defined(VF32x4_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_cmpge(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_CMPGE_DEFINED
+#endif
+#if !defined(VF32x4_MIN_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_min(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MIN_DEFINED
+#endif
+#if !defined(VF32x4_MAX_DEFINED)
+VEC_FUNC_IMPL vf32x4 vf32x4_max(vf32x4 vec1, vf32x4 vec2)
+{
+	vf32x4 vec;
+	vec.altivec = (vector float)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF32x4_MAX_DEFINED
+#endif
+#if !defined(VF64x2_SPLAT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_splat(vec_f64 x)
+{
+	vf64x2 vec;
+	vec.altivec = vec_splats(x);
+	return vec;
+}
+# define VF64x2_SPLAT_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_ALIGNED_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load_aligned(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	vec.altivec = vec_ld(0, x);
+	return vec;
+}
+# define VF64x2_LOAD_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_LOAD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_load(const vec_f64 x[2])
+{
+	vf64x2 vec;
+	vec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));
+	return vec;
+}
+# define VF64x2_LOAD_DEFINED
+#endif
+#if !defined(VF64x2_STORE_ALIGNED_DEFINED)
+VEC_FUNC_IMPL void vf64x2_store_aligned(vf64x2 vec, vec_f64 x[2])
+{
+	vec_st(vec.altivec, 0, x);
+}
+# define VF64x2_STORE_ALIGNED_DEFINED
+#endif
+#if !defined(VF64x2_ADD_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_add(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_add(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_ADD_DEFINED
+#endif
+#if !defined(VF64x2_SUB_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_sub(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_sub(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_SUB_DEFINED
+#endif
+#if !defined(VF64x2_MUL_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_mul(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_mul(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_MUL_DEFINED
+#endif
+#if !defined(VF64x2_DIV_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_div(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_div(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_DIV_DEFINED
+#endif
+#if !defined(VF64x2_CMPLT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmplt(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_cmplt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_CMPLT_DEFINED
+#endif
+#if !defined(VF64x2_CMPEQ_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpeq(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_cmpeq(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_CMPEQ_DEFINED
+#endif
+#if !defined(VF64x2_CMPGT_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpgt(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_cmpgt(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_CMPGT_DEFINED
+#endif
+#if !defined(VF64x2_CMPLE_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmple(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_cmple(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_CMPLE_DEFINED
+#endif
+#if !defined(VF64x2_CMPGE_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_cmpge(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_cmpge(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_CMPGE_DEFINED
+#endif
+#if !defined(VF64x2_MIN_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_min(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_min(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_MIN_DEFINED
+#endif
+#if !defined(VF64x2_MAX_DEFINED)
+VEC_FUNC_IMPL vf64x2 vf64x2_max(vf64x2 vec1, vf64x2 vec2)
+{
+	vf64x2 vec;
+	vec.altivec = (vector double)vec_max(vec1.altivec, vec2.altivec);
+	return vec;
+}
+# define VF64x2_MAX_DEFINED
+#endif
--- a/include/vec/impl/x86/sse2.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/x86/sse2.h	Wed Apr 30 18:36:38 2025 -0400
@@ -331,7 +331,7 @@
 #endif
 
 /* ------------------------------------------------------------------------ */
-/* vint8x16 */
+/* vint16x8 */
 
 #ifndef VINT16x8_SPLAT_DEFINED
 VEC_SSE2_SPLAT(/* nothing */, 16, 8)
@@ -414,7 +414,7 @@
 #endif
 
 /* ------------------------------------------------------------------------ */
-/* vuint8x16 */
+/* vuint16x8 */
 
 #ifndef VUINT16x8_SPLAT_DEFINED
 VEC_SSE2_SPLAT(u, 16, 8)
--- a/include/vec/impl/x86/sse3.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/impl/x86/sse3.h	Wed Apr 30 18:36:38 2025 -0400
@@ -31,7 +31,7 @@
 	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]) \
 	{ \
 		v##sign##int##bits##x##size vec; \
-		vec.sse = _mm_loadu_si128((const __m128i *)in); \
+		vec.sse = _mm_lddqu_si128((const __m128i *)in); \
 		return vec; \
 	}
 
--- a/include/vec/vec.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/include/vec/vec.h	Wed Apr 30 18:36:38 2025 -0400
@@ -36,7 +36,10 @@
 #define VEC_AVX2_ALIGNMENT    32
 #define VEC_AVX512F_ALIGNMENT 64
 
-// for the generic implementation
+/* for the generic implementation. note that due to delayed expansion,
+ * one for a larger implementation is basically always guaranteed to
+ * have at least the alignment of a smaller one (i.e. f64x8 alignment
+ * will be >= f64x4 alignment). This is by design. */
 #define VINT8x2_ALIGNMENT   1
 #define VUINT8x2_ALIGNMENT  1
 
@@ -79,6 +82,21 @@
 #define VUINT32x16_ALIGNMENT VUINT32x8_ALIGNMENT
 #define VUINT64x8_ALIGNMENT VUINT64x4_ALIGNMENT
 
+/* float */
+
+#define VF32x2_ALIGNMENT 4
+
+#define VF32x4_ALIGNMENT VF32x2_ALIGNMENT
+#define VF64x2_ALIGNMENT 8
+
+#define VF32x8_ALIGNMENT VF32x4_ALIGNMENT
+#define VF64x4_ALIGNMENT VF64x2_ALIGNMENT
+
+#define VF32x16_ALIGNMENT VF32x8_ALIGNMENT
+#define VF64x8_ALIGNMENT  VF64x4_ALIGNMENT
+
+/* allow to suppress hardware, so that we can make sure
+ * the generic impl isn't *painfully* slow ;) */
 #ifndef VEC_SUPPRESS_HW
 
 // IIRC `__VEC__' is also defined, but I don't know for sure.
@@ -121,6 +139,14 @@
 #  undef VUINT64x2_ALIGNMENT
 #  define VUINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
 # endif
+# if VF32x4_ALIGNMENT < VEC_ALTIVEC_ALIGNMENT
+#  undef VF32x4_ALIGNMENT
+#  define VF32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
+# endif
+# if VF64x2_ALIGNMENT < VEC_ALTIVEC_ALIGNMENT
+#  undef VF64x2_ALIGNMENT
+#  define VF64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
+# endif
 #endif
 
 #ifdef __ARM_NEON
@@ -315,12 +341,42 @@
 # endif
 #endif
 
-#endif /* defined(VEC_SUPPRESS_HW) */
+#endif /* !defined(VEC_SUPPRESS_HW) */
 
 #if VEC_GNUC_ATLEAST(4, 0, 0)
 # define VEC_COMPILER_HAS_GCC_VECTORS
+# ifdef __BIGGEST_ALIGNMENT__
+#  if VINT8x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VINT8x2_ALIGNMENT
+#   define VINT8x2_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+#  if VINT16x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VINT16x2_ALIGNMENT
+#   define VINT16x2_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+#  if VINT32x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VINT32x2_ALIGNMENT
+#   define VINT32x2_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+#  if VINT64x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VINT64x2_ALIGNMENT
+#   define VINT64x2_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+#  if VF32x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VF32x4_ALIGNMENT
+#   define VF32x4_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+#  if VF64x2_ALIGNMENT < __BIGGEST_ALIGNMENT__
+#   undef VF64x2_ALIGNMENT
+#   define VF64x2_ALIGNMENT __BIGGEST_ALIGNMENT__
+#  endif
+# endif
 #endif
 
+/* I don't think this happens on any platform yet, but we should
+ * probably take extra care to make sure the alignment of each
+ * is at least the alignment of the one half the size... */
+
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -355,6 +411,8 @@
 	return xx.d;
 }
 
+/* this is the general algorithm vec uses for its average
+ * implementation :) */
 VEC_FUNC_IMPL vec_intmax vec_imavg(vec_intmax x, vec_intmax y)
 {
 	vec_intmax x_d_rem    = (x % 2);
@@ -392,13 +450,8 @@
 #else
 // use unions to get an aligned offset without triggering strict aliasing
 # define VEC_ALIGNED_ARRAY(type, var, length, align) \
-	VEC_STATIC_ASSERT(align && ((align & (align - 1)) == 0), "vec: alignment must be a power of two"); \
-	union vec_aligned_union_##var##_ { \
-		type arr[length]; \
-		unsigned char bytes[sizeof(type) * length]; \
-	}; \
-	unsigned char vec_unaligned_##var##_[((length) * sizeof(type)) + (align) - 1]; \
-	type *var = ((union vec_aligned_union_##var##_ *)(((vec_uintptr)vec_unaligned_##var##_ + (align - 1)) & ~(align - 1)))->arr; \
+	type vec_unaligned_##var##_[length + (align) - 1]; \
+	type *var = ((union vec_aligned_union_##var##_ *)(((vec_uintptr)vec_unaligned_##var##_ + (align - 1)) & ~(align - 1)))->arr;
 # define VEC_ALIGNED_ARRAY_SIZEOF(var, align) \
 	(sizeof(vec_unaligned_##var##_) - (align - 1))
 #endif
@@ -608,6 +661,47 @@
 #define VUINT64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VUINT64x8_ALIGNMENT == 0)
 
 /* --------------------------------------------------------------- */
+/* floating point */
+
+#define VF32x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 2, VF32x2_ALIGNMENT)
+#define VF32x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x2_ALIGNMENT)
+#define VF32x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x2_ALIGNMENT)
+#define VF32x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x2_ALIGNMENT == 0)
+
+#define VF32x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 4, VF32x4_ALIGNMENT)
+#define VF32x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x4_ALIGNMENT)
+#define VF32x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x4_ALIGNMENT)
+#define VF32x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x4_ALIGNMENT == 0)
+
+#define VF32x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 8, VF32x8_ALIGNMENT)
+#define VF32x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x8_ALIGNMENT)
+#define VF32x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x8_ALIGNMENT)
+#define VF32x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x8_ALIGNMENT == 0)
+
+#define VF32x16_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f32, var, 16, VF32x16_ALIGNMENT)
+#define VF32x16_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF32x16_ALIGNMENT)
+#define VF32x16_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF32x16_ALIGNMENT)
+#define VF32x16_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF32x16_ALIGNMENT == 0)
+
+/* --------------------------------------------------------------- */
+/* double precision floating point */
+
+#define VF64x2_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 2, VF64x2_ALIGNMENT)
+#define VF64x2_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x2_ALIGNMENT)
+#define VF64x2_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x2_ALIGNMENT)
+#define VF64x2_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x2_ALIGNMENT == 0)
+
+#define VF64x4_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 4, VF64x4_ALIGNMENT)
+#define VF64x4_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x4_ALIGNMENT)
+#define VF64x4_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x4_ALIGNMENT)
+#define VF64x4_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x4_ALIGNMENT == 0)
+
+#define VF64x8_ALIGNED_ARRAY(var) VEC_ALIGNED_ARRAY(vec_f64, var, 8, VF64x8_ALIGNMENT)
+#define VF64x8_ALIGNED_ARRAY_SIZEOF(var) VEC_ALIGNED_ARRAY_SIZEOF(var, VF64x8_ALIGNMENT)
+#define VF64x8_ALIGNED_ARRAY_LENGTH(var) VEC_ALIGNED_ARRAY_LENGTH(var, VF64x8_ALIGNMENT)
+#define VF64x8_PTR_ALIGNED(ptr) (((uintptr_t)ptr) % VF64x8_ALIGNMENT == 0)
+
+/* --------------------------------------------------------------- */
 /* Defines the structures for each vector type */
 
 // 16-bit
@@ -630,7 +724,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint8 __attribute__((__vector_size__(4))) gcc;
 #endif
-	vuint8x2 generic[2];
+	vuint8x2 dbl[2];
+
+	vec_uint8 generic[4];
 } vuint8x4;
 
 typedef union {
@@ -644,7 +740,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int8 __attribute__((__vector_size__(4))) gcc;
 #endif
-	vint8x2 generic[2];
+	vint8x2 dbl[2];
+
+	vec_int8 generic[4];
 } vint8x4;
 
 typedef union {
@@ -666,7 +764,9 @@
 	vec_uint8 __attribute__((__vector_size__(8))) gcc;
 #endif
 
-	vuint8x4 generic[2];
+	vuint8x4 dbl[2];
+
+	vec_uint8 generic[8];
 } vuint8x8;
 
 typedef union {
@@ -680,7 +780,9 @@
 	vec_uint16 __attribute__((__vector_size__(8))) gcc;
 #endif
 
-	vuint16x2 generic[2];
+	vuint16x2 dbl[2];
+
+	vec_uint16 generic[4];
 } vuint16x4;
 
 typedef union {
@@ -708,7 +810,9 @@
 	vec_int8 __attribute__((__vector_size__(8))) gcc;
 #endif
 
-	vint8x4 generic[2];
+	vec_int8 generic[8];
+
+	vint8x4 dbl[2];
 } vint8x8;
 
 typedef union {
@@ -722,7 +826,9 @@
 	vec_int16 __attribute__((__vector_size__(8))) gcc;
 #endif
 
-	vint16x2 generic[2];
+	vec_int16 generic[4];
+
+	vint16x2 dbl[2];
 } vint16x4;
 
 typedef union {
@@ -753,7 +859,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint8 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vuint8x8 generic[2];
+	vuint8x8 dbl[2];
+
+	vec_uint8 generic[16];
 } vuint8x16;
 
 typedef union {
@@ -769,7 +877,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint16 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vuint16x4 generic[2];
+	vuint16x4 dbl[2];
+
+	vec_uint16 generic[8];
 } vuint16x8;
 
 typedef union {
@@ -785,7 +895,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint32 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vuint32x2 generic[2];
+	vuint32x2 dbl[2];
+
+	vec_uint32 generic[4];
 } vuint32x4;
 
 typedef union {
@@ -817,7 +929,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int8 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vint8x8 generic[2];
+	vint8x8 dbl[2];
+
+	vec_int8 generic[16];
 } vint8x16;
 
 typedef union {
@@ -833,7 +947,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int16 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vint16x4 generic[2];
+	vint16x4 dbl[2];
+
+	vec_int16 generic[8];
 } vint16x8;
 
 typedef union {
@@ -849,7 +965,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int32 __attribute__((__vector_size__(16))) gcc;
 #endif
-	vint32x2 generic[2];
+	vint32x2 dbl[2];
+
+	vec_int32 generic[4];
 } vint32x4;
 
 typedef union {
@@ -876,7 +994,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint8 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vuint8x16 generic[2];
+	vuint8x16 dbl[2];
+
+	vec_uint8 generic[32];
 } vuint8x32;
 
 typedef union {
@@ -886,7 +1006,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint16 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vuint16x8 generic[2];
+	vuint16x8 dbl[2];
+
+	vec_uint16 generic[16];
 } vuint16x16;
 
 typedef union {
@@ -896,7 +1018,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint32 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vuint32x4 generic[2];
+	vuint32x4 dbl[2];
+
+	vec_uint32 generic[8];
 } vuint32x8;
 
 typedef union {
@@ -906,7 +1030,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint64 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vuint64x2 generic[2];
+	vuint64x2 dbl[2];
+
+	vec_uint64 generic[4];
 } vuint64x4;
 
 typedef union {
@@ -916,7 +1042,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int8 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vint8x16 generic[2];
+	vint8x16 dbl[2];
+
+	vec_int8 generic[32];
 } vint8x32;
 
 typedef union {
@@ -926,7 +1054,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int16 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vint16x8 generic[2];
+	vint16x8 dbl[2];
+
+	vec_int16 generic[16];
 } vint16x16;
 
 typedef union {
@@ -936,7 +1066,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int32 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vint32x4 generic[2];
+	vint32x4 dbl[2];
+
+	vec_int32 generic[8];
 } vint32x8;
 
 typedef union {
@@ -946,7 +1078,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int64 __attribute__((__vector_size__(32))) gcc;
 #endif
-	vint64x2 generic[2];
+	vint64x2 dbl[2];
+
+	vec_int64 generic[4];
 } vint64x4;
 
 // 512-bit
@@ -957,7 +1091,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint8 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vuint8x32 generic[2];
+	vuint8x32 dbl[2];
+
+	vec_uint8 generic[64];
 } vuint8x64;
 
 typedef union {
@@ -967,7 +1103,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint16 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vuint16x16 generic[2];
+	vuint16x16 dbl[2];
+
+	vec_uint16 generic[32];
 } vuint16x32;
 
 typedef union {
@@ -977,7 +1115,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint32 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vuint32x8 generic[2];
+	vuint32x8 dbl[2];
+
+	vec_uint32 generic[16];
 } vuint32x16;
 
 typedef union {
@@ -987,7 +1127,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_uint64 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vuint64x4 generic[2];
+	vuint64x4 dbl[2];
+
+	vec_uint64 generic[8];
 } vuint64x8;
 
 typedef union {
@@ -997,7 +1139,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int8 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vint8x32 generic[2];
+	vint8x32 dbl[2];
+
+	vec_int8 generic[64];
 } vint8x64;
 
 typedef union {
@@ -1007,7 +1151,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int16 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vint16x16 generic[2];
+	vint16x16 dbl[2];
+
+	vec_int16 generic[32];
 } vint16x32;
 
 typedef union {
@@ -1017,7 +1163,9 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int32 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vint32x8 generic[2];
+	vint32x8 dbl[2];
+
+	vec_int32 generic[16];
 } vint32x16;
 
 typedef union {
@@ -1027,9 +1175,84 @@
 #ifdef VEC_COMPILER_HAS_GCC_VECTORS
 	vec_int64 __attribute__((__vector_size__(64))) gcc;
 #endif
-	vint64x4 generic[2];
+	vint64x4 dbl[2];
+
+	vec_int64 generic[8];
 } vint64x8;
 
+/* ------- Floating-point types */
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f32 __attribute__((__vector_size__(8))) gcc;
+#endif
+	vec_f32 generic[2];
+} vf32x2;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f32 __attribute__((__vector_size__(16))) gcc;
+#endif
+#ifdef VEC_COMPILER_HAS_ALTIVEC
+	vector float altivec;
+#endif
+
+	vf32x2 dbl[2];
+
+	vec_f32 generic[4];
+} vf32x4;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f32 __attribute__((__vector_size__(32))) gcc;
+#endif
+
+	vf32x4 dbl[2];
+
+	vec_f32 generic[8];
+} vf32x8;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f32 __attribute__((__vector_size__(64))) gcc;
+#endif
+
+	vf32x8 dbl[2];
+
+	vec_f32 generic[16];
+} vf32x16;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f64 __attribute__((__vector_size__(16))) gcc;
+#endif
+#ifdef VEC_COMPILER_HAS_ALTIVEC_VSX
+	vector double altivec;
+#endif
+
+	vec_f64 generic[2];
+} vf64x2;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f64 __attribute__((__vector_size__(32))) gcc;
+#endif
+
+	vf64x2 dbl[2];
+
+	vec_f64 generic[4];
+} vf64x4;
+
+typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_f64 __attribute__((__vector_size__(64))) gcc;
+#endif
+
+	vf64x4 dbl[2];
+
+	vec_f64 generic[8];
+} vf64x8;
+
 /* ------------------------------------------------------------------------ */
 /* x86 */
 
@@ -1064,6 +1287,10 @@
 /* ------------------------------------------------------------------------ */
 /* PowerPC */
 
+#ifdef VEC_COMPILER_HAS_ALTIVEC_VSX
+# include "impl/ppc/vsx.h"
+#endif
+
 #ifdef VEC_COMPILER_HAS_ALTIVEC
 # include "impl/ppc/altivec.h"
 #endif
@@ -1083,9 +1310,12 @@
 # include "impl/gcc.h"
 #endif
 
+/*we don't need to double here, because gcc defines literally everything :)*/
+
+/* ------------------------------------------------------------------------ */
 /* Fill in anything remaining with a generic array-based implementation. */
+
 #include "impl/generic.h"
-#include "impl/double.h"
 
 /* ------------------------------------------------------------------------ */
 
--- a/test/Makefile.template	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/Makefile.template	Wed Apr 30 18:36:38 2025 -0400
@@ -1,12 +1,14 @@
 CPPFLAGS += -g -O2 -I../include -Wall
 CFLAGS += $(CPPFLAGS) -std=c99
 CXXFLAGS += $(CPPFLAGS) -std=c++11
+LDADD += -lm
 
 HEADERS = ../include/vec/vec.h \
 	../include/vec/cpu.h \
 	../include/vec/mem.h \
 	../include/vec/defs.h \
 	../include/vec/impl/ppc/altivec.h \
+	../include/vec/impl/ppc/vsx.h \
 	../include/vec/impl/x86/avx2.h \
 	../include/vec/impl/x86/avx512f.h \
 	../include/vec/impl/x86/avx512bw.h \
@@ -38,13 +40,13 @@
 	$(CXX) $(CXXFLAGS) -c -o $@ $<
 
 test-generic: test.o test_benchmark_simple.o test_benchmark_vec.o
-	$(CC) $(LDFLAGS) -o $@ $^
+	$(CC) $(LDFLAGS) -o $@ $^ $(LDADD)
 
 test-host: test.o test_benchmark_simple.o test_benchmark_vec.o
-	$(CC) $(LDFLAGS) -o $@ $^
+	$(CC) $(LDFLAGS) -o $@ $^ $(LDADD)
 
 test-cxx: test-cxx.o $(HEADERS)
-	$(CXX) $(LDFLAGS) -o $@ $<
+	$(CXX) $(LDFLAGS) -o $@ $< $(LDADD)
 
 clean:
 	$(RM) $(BINS) $(OBJS)
--- a/test/test.c	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test.c	Wed Apr 30 18:36:38 2025 -0400
@@ -40,6 +40,11 @@
 	UINT32_C(0xFFFFFFFF), UINT32_C(0xFFFFFFFE), UINT32_C( 0), UINT32_C(         1),
 };
 
+static const float testvalf32[] = {
+	1.0f, -3.33f, -4096.0f, 1234.0f,
+	90.0f, -12.0f, 60.0f, 10224.0f,
+};
+
 static const int64_t testval64[] = {
 	INT64_MAX, INT64_C(-3),     INT64_C(0x00000000),   INT64_C(0xFFFFFFFFF),
 	INT64_MIN, INT64_C(645366), INT64_C(0x12345ABCDE), INT64_C(0xF00000FFF),
@@ -50,36 +55,44 @@
 	UINT64_C(0xff), UINT64_C(645366),     UINT64_C(0x12345ABCDE), UINT64_C(0xF00000FFF),
 };
 
-#define VTEST(sign, csign, bits, size) \
-	static inline v##sign##int##bits##x##size vtest##sign##bits##x##size(const size_t start) \
+static const double testvalf64[] = {
+	2345734.0, 12498.0, 12.0, -12312.0,
+	-5.0, 12.234, 3.1415, 2.71828,
+};
+
+#define VTEST(shorttype, type, ctype, bits, size) \
+	static inline v##type##bits##x##size vtest##shorttype##bits##x##size(const size_t start) \
 	{ \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(x); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(x); \
 		for (size_t i = 0; i < size; i++) \
-			x[i] = testval##sign##bits[(start + i) % ARRAY_SIZE(testval##sign##bits)]; \
-		return v##sign##int##bits##x##size##_load_aligned(x); \
+			x[i] = testval##shorttype##bits[(start + i) % ARRAY_SIZE(testval##shorttype##bits)]; \
+		return v##type##bits##x##size##_load_aligned(x); \
 	}
 
-#define VPRINT(sign, csign, psign, bits, size) \
-	static inline void print_v##sign##int##bits##x##size(FILE *file, v##sign##int##bits##x##size vec) \
+#define VPRINT(type, ctype, print, bits, size) \
+	static inline void print_v##type##bits##x##size(FILE *file, v##type##bits##x##size vec) \
 	{ \
 		fputs("vector: ", file); \
 	\
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(v); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(v); \
 	\
-		v##sign##int##bits##x##size##_store_aligned(vec, v); \
+		v##type##bits##x##size##_store_aligned(vec, v); \
 	\
-		fprintf(file, "%" PRI ## psign ## bits, v[0]); \
+		fprintf(file, "%" print, v[0]); \
 	\
 		for (int i = 1; i < size; i++) \
-			fprintf(file, ", %" PRI ## psign ## bits, v[i]); \
+			fprintf(file, ", %" print, v[i]); \
 	\
 		fputs("\n", file); \
-	\
 	}
 
 #define DEF_VEC_TEST_FUNCS(bits, size) \
-	VTEST(, , bits, size)     VTEST(u, U, bits, size) \
-	VPRINT(, , d, bits, size) VPRINT(u, U, u, bits, size)
+	VTEST(, int, INT, bits, size)     VTEST(u, uint, UINT, bits, size) \
+	VPRINT(int, INT, PRI##d##bits, bits, size) VPRINT(uint, UINT, PRI##u##bits, bits, size)
+
+#define DEF_VEC_TEST_FUNC_FLOAT(bits, size) \
+	VTEST(f, f, F, bits, size) \
+	VPRINT(f, F, "f", bits, size)
 
 DEF_VEC_TEST_FUNCS(8, 2)
 
@@ -105,18 +118,25 @@
 DEF_VEC_TEST_FUNCS(32, 16)
 DEF_VEC_TEST_FUNCS(64, 8)
 
+DEF_VEC_TEST_FUNC_FLOAT(32, 2)
+DEF_VEC_TEST_FUNC_FLOAT(32, 4)
+DEF_VEC_TEST_FUNC_FLOAT(32, 8)
+DEF_VEC_TEST_FUNC_FLOAT(32, 16)
+
+DEF_VEC_TEST_FUNC_FLOAT(64, 2)
+DEF_VEC_TEST_FUNC_FLOAT(64, 4)
+DEF_VEC_TEST_FUNC_FLOAT(64, 8)
+
 #undef DEF_VEC_TEST_FUNCS
 #undef VPRINT
 #undef VTEST
 
 // ------------------------------------------------------------
 
-#if 0
 #include "test_align.h"
 #include "test_arith.h"
 #include "test_compare.h"
 #include "test_shift.h"
-#endif
 #include "test_benchmark.h"
 
 // ------------------------------------------------------------
@@ -127,12 +147,10 @@
 
 	srand(time(NULL));
 
-#if 0
 	ret |= test_align();
 	ret |= test_arith();
 	ret |= test_compare();
 	ret |= test_shift();
-#endif
 
 	test_benchmark();
 
--- a/test/test_align.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_align.h	Wed Apr 30 18:36:38 2025 -0400
@@ -2,34 +2,35 @@
 {
 	int ret = 0;
 
-#define RUN_TEST(sign, csign, bits, size) \
+#define RUN_TEST(type, ctype, bits, size) \
 	do { \
+		int i; \
 		/* allocate the aligned array */ \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr); \
 	\
 		/* fill the values */ \
-		for (int i = 0; i < size; i++) \
+		for (i = 0; i < size; i++) \
 			vec_arr[i] = i; \
 	\
 		/* try to load it */ \
-		v##sign##int##bits##x##size vec = v##sign##int##bits##x##size##_load_aligned(vec_arr); \
+		v##type##bits##x##size vec = v##type##bits##x##size##_load(vec_arr); \
 	\
 		/* now allocate an output array */ \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \
 	\
 		/* try storing it */ \
-		v##sign##int##bits##x##size##_store_aligned(vec, vec_arr_out); \
+		v##type##bits##x##size##_store_aligned(vec, vec_arr_out); \
 	\
 		/* mark success or failure */ \
-		ret |= !!memcmp(vec_arr, vec_arr_out, size * sizeof(*vec_arr)); \
+		ret |= !!memcmp(vec_arr, vec_arr_out, size * (bits / 8)); \
 	\
-		ret |= !V##csign##INT##bits##x##size##_PTR_ALIGNED(vec_arr); \
-		ret |= !V##csign##INT##bits##x##size##_PTR_ALIGNED(vec_arr_out); \
+		ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr); \
+		ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr_out); \
 	} while (0);
 
 #define RUN_TESTS(bits, size) \
-	RUN_TEST( ,  , bits, size) \
-	RUN_TEST(u, U, bits, size)
+	RUN_TEST(int, INT, bits, size) \
+	RUN_TEST(uint, UINT, bits, size)
 
 	RUN_TESTS(8, 2)
 
@@ -56,6 +57,17 @@
 	RUN_TESTS(64, 8)
 
 #undef RUN_TESTS
+
+	/* floating point */
+	RUN_TEST(f, F, 32, 2)
+	RUN_TEST(f, F, 32, 4)
+	RUN_TEST(f, F, 32, 8)
+	RUN_TEST(f, F, 32, 16)
+
+	RUN_TEST(f, F, 64, 2)
+	RUN_TEST(f, F, 64, 4)
+	RUN_TEST(f, F, 64, 8)
+
 #undef RUN_TEST
 
 	return ret;
--- a/test/test_arith.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_arith.h	Wed Apr 30 18:36:38 2025 -0400
@@ -1,22 +1,22 @@
-#define CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, secondsign, secondcsign) \
-	static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##secondsign##int##bits##x##size b) \
+#define CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, secondtype, secondctype) \
+	static int test_arith_v##type##bits##x##size##_##op(v##type##bits##x##size a, v##secondtype##bits##x##size b) \
 	{ \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \
-		V##secondcsign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_a); \
+		V##secondctype##bits##x##size##_ALIGNED_ARRAY(orig_b); \
+		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_c); \
 	\
-		v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \
+		v##type##bits##x##size c = v##type##bits##x##size##_##op(a, b); \
 	\
-		v##sign##int##bits##x##size##_store_aligned(a, orig_a); \
-		v##secondsign##int##bits##x##size##_store_aligned(b, orig_b); \
-		v##sign##int##bits##x##size##_store_aligned(c, orig_c); \
+		v##type##bits##x##size##_store_aligned(a, orig_a); \
+		v##secondtype##bits##x##size##_store_aligned(b, orig_b); \
+		v##type##bits##x##size##_store_aligned(c, orig_c); \
 	\
 		for (int i = 0; i < size; i++) { \
-			if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \
-				fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, #equiv, (vec_##sign##int##bits)(equiv), orig_c[i]); \
-				print_v##sign##int##bits##x##size(stderr,a); \
-				print_v##secondsign##int##bits##x##size(stderr,b); \
-				print_v##sign##int##bits##x##size(stderr,c); \
+			if ((vec_##type##bits)(equiv) != orig_c[i]) { \
+				fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" print "] does not equal result [%" print "]!\n", i, #equiv, (vec_##type##bits)(equiv), orig_c[i]); \
+				print_v##type##bits##x##size(stderr,a); \
+				print_v##secondtype##bits##x##size(stderr,b); \
+				print_v##type##bits##x##size(stderr,c); \
 				fprintf(stderr, "\n"); \
 				return 1; \
 			} \
@@ -25,31 +25,41 @@
 		return 0; \
 	}
 
-#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \
-	CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, sign, csign)
+#define CREATE_TEST(type, ctype, print, bits, size, op, equiv) \
+	CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, type, ctype)
 
-#define CREATE_TEST_SHIFT(sign, psign, csign, bits, size, op, equiv) \
-	CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, u, U)
+#define CREATE_TEST_SHIFT(type, ctype, print, bits, size, op, equiv) \
+	CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, uint, UINT)
 
-#define CREATE_TESTS_SIGN(sign, psign, csign, bits, size) \
-	CREATE_TEST(sign, psign, csign, bits, size, add, orig_a[i] + orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, sub, orig_a[i] - orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, mul, orig_a[i] * orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
-	CREATE_TEST(sign, psign, csign, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \
-	CREATE_TEST(sign, psign, csign, bits, size, and, orig_a[i] & orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, or,  orig_a[i] | orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, xor, orig_a[i] ^ orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, avg, (vec_##sign##int##bits)vec_im##sign##avg(orig_a[i], orig_b[i])) \
-	CREATE_TEST_SHIFT(sign, psign, csign, bits, size, rshift, vec_##sign##rshift(orig_a[i], orig_b[i])) \
-	CREATE_TEST_SHIFT(sign, psign, csign, bits, size, lshift, vec_##sign##lshift(orig_a[i], orig_b[i])) \
-	CREATE_TEST_SHIFT(sign, psign, csign, bits, size, lrshift, vec_urshift((vec_uint##bits)orig_a[i], orig_b[i])) \
-	CREATE_TEST(sign, psign, csign, bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \
-	CREATE_TEST(sign, psign, csign, bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i])
+#define CREATE_TESTS_INT(type, ctype, sign, print, bits, size) \
+	CREATE_TEST(type, ctype, print, bits, size, add, orig_a[i] + orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, sub, orig_a[i] - orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, mul, orig_a[i] * orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
+	CREATE_TEST(type, ctype, print, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \
+	CREATE_TEST(type, ctype, print, bits, size, and, orig_a[i] & orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, or,  orig_a[i] | orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, xor, orig_a[i] ^ orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, avg, (vec_##type##bits)vec_im##sign##avg(orig_a[i], orig_b[i])) \
+	CREATE_TEST_SHIFT(type, ctype, print, bits, size, rshift, vec_##sign##rshift(orig_a[i], orig_b[i])) \
+	CREATE_TEST_SHIFT(type, ctype, print, bits, size, lshift, vec_##sign##lshift(orig_a[i], orig_b[i])) \
+	CREATE_TEST_SHIFT(type, ctype, print, bits, size, lrshift, vec_urshift((vec_uint##bits)orig_a[i], orig_b[i])) \
+	CREATE_TEST(type, ctype, print, bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \
+	CREATE_TEST(type, ctype, print, bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i])
+
+#define CREATE_TESTS_FLOAT(bits, size) \
+	CREATE_TEST(f, F, "f", bits, size, add, orig_a[i] + orig_b[i]) \
+	CREATE_TEST(f, F, "f", bits, size, sub, orig_a[i] - orig_b[i]) \
+	CREATE_TEST(f, F, "f", bits, size, mul, orig_a[i] * orig_b[i]) \
+	CREATE_TEST(f, F, "f", bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
+	CREATE_TEST(f, F, "f", bits, size, mod, (orig_b[i]) ? (fmod(orig_a[i], orig_b[i])) : 0) \
+	CREATE_TEST(f, F, "f", bits, size, avg, (orig_a[i] + orig_b[i]) / 2) \
+	CREATE_TEST(f, F, "f", bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \
+	CREATE_TEST(f, F, "f", bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i])
 
 #define CREATE_TESTS(bits, size) \
-	CREATE_TESTS_SIGN(, d, , bits, size) \
-	CREATE_TESTS_SIGN(u, u, U, bits, size)
+	CREATE_TESTS_INT(int,  INT,  /* nothing */, PRI##d##bits, bits, size) \
+	CREATE_TESTS_INT(uint, UINT, u,             PRI##u##bits, bits, size)
 
 CREATE_TESTS(8, 2)
 
@@ -75,7 +85,17 @@
 CREATE_TESTS(32, 16)
 CREATE_TESTS(64, 8)
 
-#undef CREATE_TESTS_SIGN
+CREATE_TESTS_FLOAT(32, 2)
+CREATE_TESTS_FLOAT(32, 4)
+CREATE_TESTS_FLOAT(32, 8)
+CREATE_TESTS_FLOAT(32, 16)
+
+CREATE_TESTS_FLOAT(64, 2)
+CREATE_TESTS_FLOAT(64, 4)
+CREATE_TESTS_FLOAT(64, 8)
+
+#undef CREATE_TESTS_INT
+#undef CREATE_TESTS_FLOAT
 #undef CREATE_TESTS
 #undef CREATE_TEST
 #undef CREATE_TEST_SHIFT
@@ -84,38 +104,54 @@
 {
 	int ret = 0;
 
-#define RUN_TESTS_SIGN(sign, bits, size) \
-	for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \
-		const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \
-		for (size_t j = 0U; j < ARRAY_SIZE(testval##sign##bits); j++) { \
-			const v##sign##int##bits##x##size b = vtest##sign##bits##x##size(j); \
-			ret |= test_arith_v##sign##int##bits##x##size##_add(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_sub(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_mul(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_div(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_mod(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_and(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_or(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_xor(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_avg(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_min(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_max(a, b); \
+#define RUN_TESTS_SIGN(shorttype, type, bits, size) \
+	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
+		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
+		for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \
+			const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \
+			ret |= test_arith_v##type##bits##x##size##_add(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_sub(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_mul(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_div(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_mod(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_and(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_or(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_xor(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_avg(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_min(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_max(a, b); \
 		} \
 	} \
 	\
-	for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \
-		const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \
+	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
+		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
 		for (uint32_t j = 0U; j < bits; j++) { \
 			const vuint##bits##x##size b = vuint##bits##x##size##_splat(j); \
-			ret |= test_arith_v##sign##int##bits##x##size##_rshift(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_lshift(a, b); \
-			ret |= test_arith_v##sign##int##bits##x##size##_lrshift(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_rshift(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_lshift(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_lrshift(a, b); \
 		} \
 	}
 
 #define RUN_TESTS(bits, size) \
-	RUN_TESTS_SIGN( , bits, size) \
-	RUN_TESTS_SIGN(u, bits, size)
+	RUN_TESTS_SIGN( , int,  bits, size) \
+	RUN_TESTS_SIGN(u, uint, bits, size)
+
+#define RUN_TESTS_FLOAT(shorttype, type, bits, size) \
+	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
+		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
+		for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \
+			const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \
+			ret |= test_arith_v##type##bits##x##size##_add(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_sub(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_mul(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_div(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_mod(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_avg(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_min(a, b); \
+			ret |= test_arith_v##type##bits##x##size##_max(a, b); \
+		} \
+	}
 
 	RUN_TESTS(8, 2)
 
@@ -141,6 +177,16 @@
 	RUN_TESTS(32, 16)
 	RUN_TESTS(64, 8)
 
+	RUN_TESTS_FLOAT(f, f, 32, 2)
+	RUN_TESTS_FLOAT(f, f, 32, 4)
+	RUN_TESTS_FLOAT(f, f, 32, 8)
+	RUN_TESTS_FLOAT(f, f, 32, 16)
+
+	RUN_TESTS_FLOAT(f, f, 64, 2)
+	RUN_TESTS_FLOAT(f, f, 64, 4)
+	RUN_TESTS_FLOAT(f, f, 64, 8)
+
+#undef RUN_TESTS_FLOAT
 #undef RUN_TESTS_SIGN
 #undef RUN_TESTS
 
--- a/test/test_benchmark.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_benchmark.h	Wed Apr 30 18:36:38 2025 -0400
@@ -2,37 +2,115 @@
 /* ------------------------------------------------------------------------ */
 /* simple benchmark for getting the min/max range of an audio sample. */
 
-extern void test_benchmark_sample_minmax_simple_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax);
-extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax);
+extern void test_benchmark_sample_minmax_int8x2_impl(vec_int8 *smpl, uint32_t length, vec_int8 *pmin, vec_int8 *pmax);
 
 VEC_FUNC_IMPL void test_benchmark_sample_minmax(void)
 {
-	int32_t min, max;
-	clock_t start, end;
 	int i;
-	int16_t *q = vec_malloc(16000001u * 2u);
+
+	union {
+		vec_int8   int8[16000001];
+		vec_uint8  uint8[16000001];
+		vec_int16  int16[16000001];
+		vec_uint16 uint16[16000001];
+		vec_int32  int32[16000001];
+		vec_uint32 uint32[16000001];
+		vec_int64  int64[16000001];
+		vec_uint64 uint64[16000001];
+		vec_f32    f32[16000001];
+		vec_f64    f64[16000001];
+	} *q;
+
+	q = vec_malloc(sizeof(*q));
+
+	for (i = 0; i < 16000001; i++)
+		q->f64[i] = (double)rand() / RAND_MAX;
 
-	printf("\nsigned 16-bit audio sample min/max - 1 thousand passes - 16000001 samples\n\n");
+	printf("\naudio sample min/max - 1 thousand passes - 16000001 samples\n\n");
 
-	start = clock();
-	for (i = 0; i < 100; i++) {
-		min = INT32_MAX;
-		max = INT32_MIN;
-		test_benchmark_sample_minmax_vec_impl(q, 16000001u, &min, &max);
+#define DO_TIMER(TYPE,NAME,MIN,MAX) \
+	{ \
+		vec_##TYPE min, max; \
+		clock_t start, end; \
+	\
+		start = clock(); \
+	\
+		for (i = 0; i < 1000; i++) { \
+			extern void test_benchmark_sample_minmax_##NAME##_impl(vec_##TYPE *smpl, uint32_t length, vec_##TYPE *pmin, vec_##TYPE *pmax); \
+	\
+			min = MAX; \
+			max = MIN; \
+	\
+			test_benchmark_sample_minmax_##NAME##_impl(q->TYPE, 16000001u, &min, &max); \
+		} \
+	\
+		end = clock(); \
+	\
+		printf("- %s: took %f secs\n", #NAME, (double)(end - start) / CLOCKS_PER_SEC); \
 	}
-	end = clock();
+
+	DO_TIMER(int8, int8x2, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8x4, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8x8, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8x16, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8x32, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8x64, INT8_MIN, INT8_MAX)
+	DO_TIMER(int8, int8, INT8_MIN, INT8_MAX)
 
-	printf("- vec: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC);
+	DO_TIMER(int16, int16x2, INT16_MIN, INT16_MAX)
+	DO_TIMER(int16, int16x4, INT16_MIN, INT16_MAX)
+	DO_TIMER(int16, int16x8, INT16_MIN, INT16_MAX)
+	DO_TIMER(int16, int16x16, INT16_MIN, INT16_MAX)
+	DO_TIMER(int16, int16x32, INT16_MIN, INT16_MAX)
+	DO_TIMER(int16, int16, INT16_MIN, INT16_MAX)
+
+	DO_TIMER(int32, int32x2, INT32_MIN, INT32_MAX)
+	DO_TIMER(int32, int32x4, INT32_MIN, INT32_MAX)
+	DO_TIMER(int32, int32x8, INT32_MIN, INT32_MAX)
+	DO_TIMER(int32, int32x16, INT32_MIN, INT32_MAX)
+	DO_TIMER(int32, int32, INT32_MIN, INT32_MAX)
+
+	DO_TIMER(int64, int64x2, INT64_MIN, INT64_MAX)
+	DO_TIMER(int64, int64x4, INT64_MIN, INT64_MAX)
+	DO_TIMER(int64, int64x8, INT64_MIN, INT64_MAX)
+	DO_TIMER(int64, int64, INT64_MIN, INT64_MAX)
 
-	start = clock();
-	for (i = 0; i < 100; i++) {
-		min = INT32_MAX;
-		max = INT32_MIN;
-		test_benchmark_sample_minmax_simple_impl(q, 16000001u, &min, &max);
-	}
-	end = clock();
+	DO_TIMER(uint8, uint8x2, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8x4, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8x8, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8x16, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8x32, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8x64, 0, UINT8_MAX)
+	DO_TIMER(uint8, uint8, 0, UINT8_MAX)
+
+	DO_TIMER(uint16, uint16x2, 0, UINT16_MAX)
+	DO_TIMER(uint16, uint16x4, 0, UINT16_MAX)
+	DO_TIMER(uint16, uint16x8, 0, UINT16_MAX)
+	DO_TIMER(uint16, uint16x16, 0, UINT16_MAX)
+	DO_TIMER(uint16, uint16x32, 0, UINT16_MAX)
+	DO_TIMER(uint16, uint16, 0, UINT16_MAX)
 
-	printf("- simple: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC);
+	DO_TIMER(uint32, uint32x2, 0, UINT32_MAX)
+	DO_TIMER(uint32, uint32x4, 0, UINT32_MAX)
+	DO_TIMER(uint32, uint32x8, 0, UINT32_MAX)
+	DO_TIMER(uint32, uint32x16, 0, UINT32_MAX)
+	DO_TIMER(uint32, uint32, 0, UINT32_MAX)
+
+	DO_TIMER(uint64, uint64x2, 0, UINT64_MAX)
+	DO_TIMER(uint64, uint64x4, 0, UINT64_MAX)
+	DO_TIMER(uint64, uint64x8, 0, UINT64_MAX)
+	DO_TIMER(uint64, uint64, 0, UINT64_MAX)
+
+	DO_TIMER(f32, f32x2, -1.0f, 1.0f)
+	DO_TIMER(f32, f32x4, -1.0f, 1.0f)
+	DO_TIMER(f32, f32x8, -1.0f, 1.0f)
+	DO_TIMER(f32, f32x16, -1.0f, 1.0f)
+	DO_TIMER(f32, f32, -1.0f, 1.0f)
+
+	DO_TIMER(f64, f64x2, -1.0, 1.0)
+	DO_TIMER(f64, f64x4, -1.0, 1.0)
+	DO_TIMER(f64, f64x8, -1.0, 1.0)
+	DO_TIMER(f64, f64, -1.0, 1.0)
 
 	printf("\n");
 
--- a/test/test_benchmark_simple.c	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_benchmark_simple.c	Wed Apr 30 18:36:38 2025 -0400
@@ -1,18 +1,30 @@
-#include <stdint.h>
+#include "vec/defs.h"
 
-extern void test_benchmark_sample_minmax_simple_impl(int16_t *smpl,
-	uint32_t length, int32_t *pmin, int32_t *pmax)
-{
-	int32_t min = *pmin;
-	int32_t max = *pmax;
-
-	while (length--) {
-		if (*smpl < min) min = *smpl;
-		if (*smpl > max) max = *smpl;
-
-		smpl++;
+#define DEFINE_VARIANT(type, bits) \
+	extern void test_benchmark_sample_minmax_##type##bits##_impl(vec_##type##bits *smpl, \
+		uint32_t length, vec_##type##bits *pmin, vec_##type##bits *pmax) \
+	{ \
+		vec_##type##bits min = *pmin; \
+		vec_##type##bits max = *pmax; \
+	\
+		while (length--) { \
+			if (*smpl < min) min = *smpl; \
+			if (*smpl > max) max = *smpl; \
+	\
+			smpl++; \
+		} \
+	\
+		*pmin = min; \
+		*pmax = max; \
 	}
 
-	*pmin = min;
-	*pmax = max;
-}
+DEFINE_VARIANT(int, 8)
+DEFINE_VARIANT(uint, 8)
+DEFINE_VARIANT(int, 16)
+DEFINE_VARIANT(uint, 16)
+DEFINE_VARIANT(int, 32)
+DEFINE_VARIANT(uint, 32)
+DEFINE_VARIANT(f, 32)
+DEFINE_VARIANT(int, 64)
+DEFINE_VARIANT(uint, 64)
+DEFINE_VARIANT(f, 64)
--- a/test/test_benchmark_vec.c	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_benchmark_vec.c	Wed Apr 30 18:36:38 2025 -0400
@@ -1,43 +1,97 @@
 #include "vec/vec.h"
 
-extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl,
-	uint32_t length, int32_t *pmin, int32_t *pmax)
-{
-	int32_t smin = INT32_MAX, smax = INT32_MIN;
-	uint32_t len32;
-	int i;
-	vint16x8 min = vint16x8_splat(*pmin);
-	vint16x8 max = vint16x8_splat(*pmax);
-	VINT16x8_ALIGNED_ARRAY(mins);
-	VINT16x8_ALIGNED_ARRAY(maxs);
-
-	len32 = length / 8;
-	while (len32--) {
-		vint16x8 vec = vint16x8_load_aligned(smpl);
-
-		min = vint16x8_min(vec, min);
-		max = vint16x8_max(vec, max);
-
-		smpl += 8;
+#define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \
+	extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \
+		uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \
+	{ \
+		vec_##TYPE##BITS smin = MAX, smax = MIN; \
+		uint32_t len32; \
+		int i; \
+		v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \
+		v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \
+		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \
+		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \
+	\
+		len32 = length / SIZE; \
+		while (len32--) { \
+			v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \
+	\
+			min = v##TYPE##BITS##x##SIZE##_min(vec, min); \
+			max = v##TYPE##BITS##x##SIZE##_max(vec, max); \
+	\
+			smpl += SIZE; \
+		} \
+	\
+		v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \
+		v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \
+	\
+		/* get the lowest minimum of what we have left */ \
+		for (i = 0; i < SIZE; i++) { \
+			if (mins[i] < smin) smin = mins[i]; \
+			if (maxs[i] > smax) smax = maxs[i]; \
+		} \
+	\
+		len32 = length % SIZE; \
+		while (len32--) { \
+			if (*smpl < smin) smin = *smpl; \
+			if (*smpl > smax) smax = *smpl; \
+	\
+			smpl++; \
+		} \
+	\
+		*pmin = smin; \
+		*pmax = smax; \
 	}
 
-	vint16x8_store_aligned(min, mins);
-	vint16x8_store_aligned(max, maxs);
+DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN)
 
-	/* get the lowest minimum of what we have left */
-	for (i = 0; i < 8; i++) {
-		if (mins[i] < smin) smin = mins[i];
-		if (maxs[i] > smax) smax = maxs[i];
-	}
+DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN)
+
+DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN)
+
+DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN)
 
-	len32 = length % 8;
-	while (len32--) {
-		if (*smpl < smin) smin = *smpl;
-		if (*smpl > smax) smax = *smpl;
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0)
+
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0)
 
-		smpl++;
-	}
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0)
+
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0)
 
-	*pmin = smin;
-	*pmax = smax;
-}
+DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f)
+
+DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0)
+DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0)
+DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)
--- a/test/test_compare.h	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_compare.h	Wed Apr 30 18:36:38 2025 -0400
@@ -1,21 +1,21 @@
-#define CREATE_TEST(sign, psign, bits, size, op, equiv) \
-	static int test_compare_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##sign##int##bits##x##size b) \
+#define CREATE_TEST(type, print, bits, size, op, equiv) \
+	static int test_compare_v##type##bits##x##size##_##op(v##type##bits##x##size a, v##type##bits##x##size b) \
 	{ \
-		sign##int##bits##_t orig_a[size], orig_b[size], orig_c[size]; \
+		vec_##type##bits orig_a[size], orig_b[size], orig_c[size]; \
 	\
-		v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \
+		v##type##bits##x##size c = v##type##bits##x##size##_##op(a, b); \
 	\
-		v##sign##int##bits##x##size##_store(a, orig_a); \
-		v##sign##int##bits##x##size##_store(b, orig_b); \
-		v##sign##int##bits##x##size##_store(c, orig_c); \
+		v##type##bits##x##size##_store(a, orig_a); \
+		v##type##bits##x##size##_store(b, orig_b); \
+		v##type##bits##x##size##_store(c, orig_c); \
 	\
 		for (int i = 0; i < size; i++) { \
-			if ((vec_##sign##int##bits)(((equiv) ? UINT##bits##_MAX : 0)) != orig_c[i]) { \
-				printf("%lld %lld\n", (long long)(vec_##sign##int##bits)(((equiv) ? UINT##bits##_MAX : 0)), (long long)orig_c[i]); \
-				fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%d] does not equal result [%" PRI ## psign ## bits "]!\n", i, equiv, orig_c[i]); \
-				print_v##sign##int##bits##x##size(stderr,a); \
-				print_v##sign##int##bits##x##size(stderr,b); \
-				print_v##sign##int##bits##x##size(stderr,c); \
+			uint##bits##_t res = (((equiv) ? UINT##bits##_MAX : 0)); \
+			if (memcmp(&res, orig_c + i, sizeof(res))) { \
+				fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%d] does not equal result [%" print "]!\n", i, equiv, orig_c[i]); \
+				print_v##type##bits##x##size(stderr,a); \
+				print_v##type##bits##x##size(stderr,b); \
+				print_v##type##bits##x##size(stderr,c); \
 				fprintf(stderr, "\n"); \
 				return 1; \
 			} \
@@ -24,63 +24,81 @@
 		return 0; \
 	}
 
-#define CREATE_TESTS_SIGN(sign, psign, bits, size) \
-	CREATE_TEST(sign, psign, bits, size, cmplt, orig_a[i] < orig_b[i]) \
-	CREATE_TEST(sign, psign, bits, size, cmpgt, orig_a[i] > orig_b[i]) \
-	CREATE_TEST(sign, psign, bits, size, cmpeq, orig_a[i] == orig_b[i]) \
-	CREATE_TEST(sign, psign, bits, size, cmple, orig_a[i] <= orig_b[i]) \
-	CREATE_TEST(sign, psign, bits, size, cmpge, orig_a[i] >= orig_b[i])
+#define CREATE_TESTS_SIGN(type, print, bits, size) \
+	CREATE_TEST(type, print, bits, size, cmplt, orig_a[i] < orig_b[i]) \
+	CREATE_TEST(type, print, bits, size, cmpgt, orig_a[i] > orig_b[i]) \
+	CREATE_TEST(type, print, bits, size, cmpeq, orig_a[i] == orig_b[i]) \
+	CREATE_TEST(type, print, bits, size, cmple, orig_a[i] <= orig_b[i]) \
+	CREATE_TEST(type, print, bits, size, cmpge, orig_a[i] >= orig_b[i])
+
+#define CREATE_TESTS_INT(bits, size) \
+	CREATE_TESTS_SIGN(int,  PRI##d##bits, bits, size) \
+	CREATE_TESTS_SIGN(uint, PRI##u##bits, bits, size)
 
-#define CREATE_TESTS(bits, size) CREATE_TESTS_SIGN(, d, bits, size) CREATE_TESTS_SIGN(u, u, bits, size)
+#define CREATE_TESTS_FLOAT(bits, size) \
+	CREATE_TESTS_SIGN(f, "f", bits, size)
+
+CREATE_TESTS_INT(8, 2)
 
-CREATE_TESTS(8, 2)
+CREATE_TESTS_INT(8, 4)
+CREATE_TESTS_INT(16, 2)
 
-CREATE_TESTS(8, 4)
-CREATE_TESTS(16, 2)
+CREATE_TESTS_INT(8, 8)
+CREATE_TESTS_INT(16, 4)
+CREATE_TESTS_INT(32, 2)
 
-CREATE_TESTS(8, 8)
-CREATE_TESTS(16, 4)
-CREATE_TESTS(32, 2)
+CREATE_TESTS_INT(8, 16)
+CREATE_TESTS_INT(16, 8)
+CREATE_TESTS_INT(32, 4)
+CREATE_TESTS_INT(64, 2)
 
-CREATE_TESTS(8, 16)
-CREATE_TESTS(16, 8)
-CREATE_TESTS(32, 4)
-CREATE_TESTS(64, 2)
+CREATE_TESTS_INT(8, 32)
+CREATE_TESTS_INT(16, 16)
+CREATE_TESTS_INT(32, 8)
+CREATE_TESTS_INT(64, 4)
 
-CREATE_TESTS(8, 32)
-CREATE_TESTS(16, 16)
-CREATE_TESTS(32, 8)
-CREATE_TESTS(64, 4)
+CREATE_TESTS_INT(8, 64)
+CREATE_TESTS_INT(16, 32)
+CREATE_TESTS_INT(32, 16)
+CREATE_TESTS_INT(64, 8)
 
-CREATE_TESTS(8, 64)
-CREATE_TESTS(16, 32)
-CREATE_TESTS(32, 16)
-CREATE_TESTS(64, 8)
+CREATE_TESTS_FLOAT(32, 2)
+CREATE_TESTS_FLOAT(32, 4)
+CREATE_TESTS_FLOAT(32, 8)
+CREATE_TESTS_FLOAT(32, 16)
+
+CREATE_TESTS_FLOAT(64, 2)
+CREATE_TESTS_FLOAT(64, 4)
+CREATE_TESTS_FLOAT(64, 8)
 
 #undef CREATE_TESTS_SIGN
-#undef CREATE_TESTS
+#undef CREATE_TESTS_INT
+#undef CREATE_TESTS_FLOAT
 #undef CREATE_TEST
 
 static int test_compare(void)
 {
 	int ret = 0;
 
-#define RUN_TESTS_SIGN(sign, bits, size) \
-	for (size_t i = 0U; i < ARRAY_SIZE(testval##sign##bits); i++) { \
-		const v##sign##int##bits##x##size a = vtest##sign##bits##x##size(i); \
-		for (size_t j = 0U; j < ARRAY_SIZE(testval##sign##bits); j++) { \
-			const v##sign##int##bits##x##size b = vtest##sign##bits##x##size(j); \
-			ret |= test_compare_v##sign##int##bits##x##size##_cmplt(a, b); \
-			ret |= test_compare_v##sign##int##bits##x##size##_cmpgt(a, b); \
-			ret |= test_compare_v##sign##int##bits##x##size##_cmpeq(a, b); \
-			ret |= test_compare_v##sign##int##bits##x##size##_cmple(a, b); \
-			ret |= test_compare_v##sign##int##bits##x##size##_cmpge(a, b); \
+#define RUN_TESTS_SIGN(shorttype, type, bits, size) \
+	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
+		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
+		for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \
+			const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \
+			ret |= test_compare_v##type##bits##x##size##_cmplt(a, b); \
+			ret |= test_compare_v##type##bits##x##size##_cmpgt(a, b); \
+			ret |= test_compare_v##type##bits##x##size##_cmpeq(a, b); \
+			ret |= test_compare_v##type##bits##x##size##_cmple(a, b); \
+			ret |= test_compare_v##type##bits##x##size##_cmpge(a, b); \
 		} \
 	}
 
 #define RUN_TESTS(bits, size) \
-	RUN_TESTS_SIGN( , bits, size) \
-	RUN_TESTS_SIGN(u, bits, size)
+	RUN_TESTS_SIGN( , int, bits, size) \
+	RUN_TESTS_SIGN(u, uint, bits, size)
+
+#define RUN_TESTS_FLOAT(bits, size) \
+	RUN_TESTS_SIGN(f, f, bits, size)
 
 	RUN_TESTS(8, 2)
 
@@ -106,7 +124,17 @@
 	RUN_TESTS(32, 16)
 	RUN_TESTS(64, 8)
 
+	RUN_TESTS_FLOAT(32, 2)
+	RUN_TESTS_FLOAT(32, 4)
+	RUN_TESTS_FLOAT(32, 8)
+	RUN_TESTS_FLOAT(32, 16)
+
+	RUN_TESTS_FLOAT(64, 2)
+	RUN_TESTS_FLOAT(64, 4)
+	RUN_TESTS_FLOAT(64, 8)
+
 #undef RUN_TESTS_SIGN
+#undef RUN_TESTS_FLOAT
 #undef RUN_TESTS
 
 	return ret;
--- a/utils/genaltivec.c	Tue Apr 29 16:54:13 2025 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,376 +0,0 @@
-/**
- * vec - a tiny SIMD vector library in C99
- * 
- * Copyright (c) 2024-2025 Paper
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
-**/
-
-/* Use this file to generate include/vec/impl/ppc/altivec.h !!
- *
- * `gcc -o genaltivec genaltivec.c` */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-/* ------------------------------------------------------------------------ */
-
-/* #define USE_VSX_EXTENSIONS */
-
-enum op {
-	/* return vector, take in a integer */
-	OP_SPLAT = 0,
-
-	/* return vector, take in an array */
-	OP_LOAD_ALIGNED,
-	OP_LOAD,
-
-	/* void, take in vector and array */
-	OP_STORE_ALIGNED,
-	OP_STORE,
-
-	/* return vector, takes in two vectors */
-	OP_ADD,
-	OP_SUB,
-	OP_MUL,
-	OP_AND,
-	OP_OR,
-	OP_XOR,
-	OP_CMPLT,
-	OP_CMPEQ,
-	OP_CMPGT,
-#ifdef USE_VSX_EXTENSIONS
-	OP_CMPLE,
-	OP_CMPGE,
-#endif
-	OP_MIN,
-	OP_MAX,
-	OP_AVG,
-
-	/* return vector, takes in a vector and an explicitly unsigned vector */
-	OP_LSHIFT,
-	OP_LRSHIFT,
-	OP_RSHIFT,
-
-	OP_FINAL_,
-
-	/* unimplemented, no altivec version :) */
-	OP_NOT,
-};
-
-/* convert op -> string */
-static struct {
-	const char *u;
-	const char *l;
-} op_names[] = {
-	[OP_SPLAT] = {"SPLAT", "splat"},
-	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"},
-	[OP_LOAD] = {"LOAD", "load"},
-	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"},
-	[OP_STORE] = {"STORE", "store"},
-	[OP_ADD] = {"ADD", "add"},
-	[OP_SUB] = {"SUB", "sub"},
-	[OP_MUL] = {"MUL", "mul"},
-	[OP_AVG] = {"AVG", "avg"},
-	[OP_AND] = {"AND", "and"},
-	[OP_OR] = {"OR", "or"},
-	[OP_XOR] = {"XOR", "xor"},
-	[OP_NOT] = {"NOT", "not"},
-	[OP_CMPLT] = {"CMPLT", "cmplt"},
-	[OP_CMPEQ] = {"CMPEQ", "cmpeq"},
-	[OP_CMPGT] = {"CMPGT", "cmpgt"},
-#ifdef USE_VSX_EXTENSIONS
-	[OP_CMPLE] = {"CMPLE", "cmple"},
-	[OP_CMPGE] = {"CMPGE", "cmpge"},
-#endif
-	[OP_MIN] = {"MIN", "min"},
-	[OP_MAX] = {"MAX", "max"},
-	[OP_RSHIFT] = {"RSHIFT", "rshift"},
-	[OP_LRSHIFT] = {"LRSHIFT", "lrshift"},
-	[OP_LSHIFT] = {"LSHIFT", "lshift"},
-};
-
-#define UPSIGN(x) ((x) ? "" : "U")
-#define LOSIGN(x) ((x) ? "" : "u")
-
-#define LOAVSIGN(x) ((x) ? "s" : "u")
-
-static void print_gcc_op(enum op op, int is_signed, int bits, int size)
-{
-	int i;
-
-	/* compatibility with ancient gcc */
-	switch (op) {
-	case OP_MUL:
-		puts("#ifdef vec_mul");
-		break;
-	case OP_SPLAT:
-		printf("#if defined(vec_splats) || defined(vec_splat_%s%d)\n", (is_signed) ? "s" : "u", bits);
-		break;
-	default:
-		break;
-	}
-
-	printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
-
-	printf("VEC_FUNC_IMPL ");
-
-	/* first; the return value */
-	switch (op) {
-	case OP_SPLAT:
-	case OP_LOAD_ALIGNED:
-	case OP_LOAD:
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-#ifdef USE_VSX_EXTENSIONS
-	case OP_CMPLE:
-	case OP_CMPGE:
-#endif
-	case OP_MIN:
-	case OP_MAX:
-	case OP_AVG:
-	case OP_RSHIFT:
-	case OP_LRSHIFT:
-	case OP_LSHIFT:
-	case OP_NOT:
-		printf("v%sint%dx%d", LOSIGN(is_signed), bits, size);
-		break;
-	case OP_STORE_ALIGNED:
-	case OP_STORE:
-		printf("void");
-		break;
-	}
-
-	/* whitespace and function name */
-	printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l);
-
-	/* parameters */
-	switch (op) {
-	case OP_SPLAT:
-		printf("vec_%sint%d x", LOSIGN(is_signed), bits);
-		break;
-	case OP_LOAD_ALIGNED:
-	case OP_LOAD:
-		printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size);
-		break;
-	case OP_STORE_ALIGNED:
-	case OP_STORE:
-		printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
-		break;
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-#ifdef USE_VSX_EXTENSIONS
-	case OP_CMPLE:
-	case OP_CMPGE:
-#endif
-	case OP_MIN:
-	case OP_MAX:
-	case OP_AVG:
-		printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
-		break;
-	case OP_RSHIFT:
-	case OP_LRSHIFT:
-	case OP_LSHIFT:
-		printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size);
-		break;
-	case OP_NOT:
-		printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size);
-		break;
-	}
-
-	puts(")\n{");
-
-	switch (op) {
-	case OP_SPLAT:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		puts("\tvec.altivec = vec_splats(x);");
-		puts("\treturn vec;");
-		break;
-	case OP_LOAD_ALIGNED:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		puts("\tvec.altivec = vec_ld(0, x);");
-		puts("\treturn vec;");
-		break;
-	case OP_LOAD:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));");
-		puts("\treturn vec;");
-		break;
-	case OP_STORE_ALIGNED:
-		puts("\tvec_st(vec.altivec, 0, arr);");
-		break;
-	case OP_STORE:
-		/* ??? */
-		puts("\tmemcpy(arr, &vec, sizeof(vec));");
-		break;
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_AVG:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-#ifdef USE_VSX_EXTENSIONS
-	case OP_CMPLE:
-	case OP_CMPGE:
-#endif
-	case OP_LSHIFT:
-	case OP_LRSHIFT:
-	case OP_RSHIFT:
-	case OP_MIN:
-	case OP_MAX: {
-		static const char *op_altivec[OP_LRSHIFT - OP_ADD + 1] = {"add", "sub", "mul", "and", "or", "xor", "cmplt", "cmpeq", "cmpgt",
-#ifdef USE_VSX_EXTENSIONS
-			"cmple",
-			"cmpge",
-#endif
-			"min", "max", "avg", "sl", "sr"};
-		static const char *types[] = {"char", "short", NULL, "int"};
-
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		if (op == OP_RSHIFT) {
-			printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "a" : "");
-		} else {
-			printf("\tvec.altivec = (vector %s %s)vec_%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "signed" : "unsigned", types[(bits / 8) - 1], op_altivec[op - OP_ADD]);
-		}
-		puts("\treturn vec;");
-		break;
-	}
-	default:
-		printf("#error implement this operation");
-		break;
-	}
-
-	/* end function definition */
-	puts("}");
-
-	printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
-	puts("#endif");
-
-	switch (op) {
-	case OP_SPLAT:
-	case OP_MUL:
-		puts("#endif");
-		break;
-	default:
-		break;
-	}
-}
-
-static inline void print_ops(int is_signed, int bits, int size)
-{
-	int i;
-
-	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
-
-	for (i = 0; i < OP_FINAL_; i++)
-		print_gcc_op(i, is_signed, bits, size);
-}
-
-#ifdef USE_VSX_EXTENSIONS
-# define HEADER_GUARD_NAME "VSX"
-#else
-# define HEADER_GUARD_NAME "ALTIVEC"
-#endif
-
-static const char *header =
-	"/**\n"
-	" * vec - a tiny SIMD vector library in C99\n"
-	" * \n"
-	" * Copyright (c) 2024-2025 Paper\n"
-	" * \n"
-	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
-	" * of this software and associated documentation files (the \"Software\"), to deal\n"
-	" * in the Software without restriction, including without limitation the rights\n"
-	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
-	" * copies of the Software, and to permit persons to whom the Software is\n"
-	" * furnished to do so, subject to the following conditions:\n"
-	" * \n"
-	" * The above copyright notice and this permission notice shall be included in all\n"
-	" * copies or substantial portions of the Software.\n"
-	" * \n"
-	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
-	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
-	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
-	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
-	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
-	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
-	" * SOFTWARE.\n"
-	"**/\n"
-	"\n"
-	"/* This file is automatically generated! Do not edit it directly!\n"
-	" * Edit the code that generates it in utils/genaltivec.c  --paper */\n"
-	"\n"
-	"#ifndef VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n"
-	"#define VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n"
-	"\n";
-
-static const char *footer = 
-	"#endif /* VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_ */\n";
-
-int main(void)
-{
-	static struct {
-		int bits, size;
-	} defs[] = {
-		/* -- 8-bit */
-		{8, 16},
-		/* -- 16-bit */
-		{16, 8},
-
-		/* -- 32-bit */
-		{32, 4},
-
-#ifdef USE_VSX_EXTENSIONS
-		/* -- 64-bit */
-		{64, 2},
-#endif
-	};
-	int i;
-
-	puts(header);
-
-	for (i = 0; i < ARRAY_SIZE(defs); i++) {
-		print_ops(1, defs[i].bits, defs[i].size);
-		print_ops(0, defs[i].bits, defs[i].size);
-	}
-
-	puts(footer);
-}
--- a/utils/gendouble.c	Tue Apr 29 16:54:13 2025 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,244 +0,0 @@
-/**
- * vec - a tiny SIMD vector library in C99
- * 
- * Copyright (c) 2024-2025 Paper
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
-**/
-
-/* Use this file to generate include/vec/impl/double.h !!
- *
- * `gcc -o gendouble gendouble.c` */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-/* XXX: would it be faster to unroll literally everything instead of defining everything,
- * and then unpacking it all? */
-static const char *header =
-	"/**\n"
-	" * vec - a tiny SIMD vector library in C99\n"
-	" * \n"
-	" * Copyright (c) 2024-2025 Paper\n"
-	" * \n"
-	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
-	" * of this software and associated documentation files (the \"Software\"), to deal\n"
-	" * in the Software without restriction, including without limitation the rights\n"
-	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
-	" * copies of the Software, and to permit persons to whom the Software is\n"
-	" * furnished to do so, subject to the following conditions:\n"
-	" * \n"
-	" * The above copyright notice and this permission notice shall be included in all\n"
-	" * copies or substantial portions of the Software.\n"
-	" * \n"
-	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
-	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
-	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
-	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
-	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
-	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
-	" * SOFTWARE.\n"
-	"**/\n"
-	"\n"
-	"/* This file is automatically generated! Do not edit it directly!\n"
-	" * Edit the code that generates it in utils/gengeneric.c  --paper */\n"
-	"\n"
-	"#ifndef VEC_IMPL_DOUBLE_H_\n"
-	"#define VEC_IMPL_DOUBLE_H_\n"
-	"\n"
-	"#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size vec; \\\n"
-	"	\\\n"
-	"		vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
-	"		vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
-	"	\\\n"
-	"		return vec; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size vec; \\\n"
-	"	\\\n"
-	"		vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n"
-	"		vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n"
-	"	\\\n"
-	"		return vec; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)\n"
-	"#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n"
-	"\n"
-	"#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \\\n"
-	"	VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \\\n"
-	"		v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)\n"
-	"#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)\n"
-	"\n"
-	"#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \\\n"
-	"		vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \\\n"
-	"	\\\n"
-	"		return vec1; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)\n"
-	"#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)\n"
-	"#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)\n"
-	"#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)\n"
-	"#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)\n"
-	"\n"
-	"#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n"
-	"	{ \\\n"
-	"		vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \\\n"
-	"		vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \\\n"
-	"	\\\n"
-	"		return vec; \\\n"
-	"	}\n"
-	"\n"
-	"#endif /* VEC_IMPL_DOUBLE_H_ */ \n"
-	"\n"
-	"/* ------------------------------------------------------------------------ */\n"
-	"/* PREPROCESSOR HELL INCOMING */\n"
-	"";
-
-static const char *footer = 
-	"" /* nothing */;
-
-/* ------------------------------------------------------------------------ */
-
-static void print_generic_dbl_op(const char *op, int is_signed, int bits, int size)
-{
-	printf(
-		"#if !defined(V%sINT%dx%d_%s_DEFINED) && defined(V%sINT%dx%d_%s_DEFINED)\n"
-		"VEC_DOUBLE_%s(%s, %d, %d, %d)\n"
-		"# define V%sINT%dx%d_%s_DEFINED\n"
-		"#endif\n\n",
-	(is_signed ? "" : "U"), bits, size, op, (is_signed ? "" : "U"), bits, size / 2, op,
-	op, (is_signed ? "/* nothing */" : "u"), bits, size, size / 2,
-	(is_signed ? "" : "U"), bits, size, op);
-}
-
-typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size);
-
-static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op)
-{
-	/* all supported operations here */
-	static const char *ops[] = {
-		"SPLAT",
-		"LOAD_ALIGNED",
-		"LOAD",
-		"STORE_ALIGNED",
-		"STORE",
-		"ADD",
-		"SUB",
-		"MUL",
-		"DIV",
-		"MOD",
-		"AVG",
-		"AND",
-		"OR",
-		"XOR",
-		"NOT",
-		"CMPLT",
-		"CMPEQ",
-		"CMPGT",
-		"CMPLE", /* these two must be after CMPLT and CMPGT respectfully, */
-		"CMPGE", /* because their definitions call those functions */
-		"MIN",
-		"MAX",
-		"RSHIFT",
-		"LRSHIFT",
-		"LSHIFT",
-		NULL,
-	};
-	int i;
-
-	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
-
-	for (i = 0; ops[i]; i++)
-		print_op(ops[i], is_signed, bits, size);
-}
-
-int main(void)
-{
-	static struct {
-		int bits, size;
-		print_op_spec print_op;
-	} defs[] = {
-		/* -- 8-bit */
-		{8, 4, print_generic_dbl_op},
-		{8, 8, print_generic_dbl_op},
-		{8, 16, print_generic_dbl_op},
-		{8, 32, print_generic_dbl_op},
-		{8, 64, print_generic_dbl_op},
-
-		/* -- 16-bit */
-		{16, 4, print_generic_dbl_op},
-		{16, 8, print_generic_dbl_op},
-		{16, 16, print_generic_dbl_op},
-		{16, 32, print_generic_dbl_op},
-
-		/* -- 32-bit */
-		{32, 4, print_generic_dbl_op},
-		{32, 8, print_generic_dbl_op},
-		{32, 16, print_generic_dbl_op},
-
-		/* -- 64-bit */
-		{64, 4, print_generic_dbl_op},
-		{64, 8, print_generic_dbl_op},
-	};
-	int i;
-
-	puts(header);
-
-	for (i = 0; i < ARRAY_SIZE(defs); i++) {
-		print_ops(1, defs[i].bits, defs[i].size, defs[i].print_op);
-		print_ops(0, defs[i].bits, defs[i].size, defs[i].print_op);
-	}
-
-	puts(footer);
-}
--- a/utils/gengcc.c	Tue Apr 29 16:54:13 2025 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,414 +0,0 @@
-/**
- * vec - a tiny SIMD vector library in C99
- * 
- * Copyright (c) 2024-2025 Paper
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
-**/
-
-/* Use this file to generate include/vec/impl/generic.h !!
- *
- * `gcc -o gengeneric gengeneric.c` */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-/* ------------------------------------------------------------------------ */
-
-enum op {
-	/* return vector, take in a integer */
-	OP_SPLAT = 0,
-
-	/* return vector, take in an array */
-	OP_LOAD_ALIGNED,
-	OP_LOAD,
-
-	/* void, take in vector and array */
-	OP_STORE_ALIGNED,
-	OP_STORE,
-
-	/* return vector, takes in two vectors */
-	OP_ADD,
-	OP_SUB,
-	OP_MUL,
-	OP_AND,
-	OP_OR,
-	OP_XOR,
-	OP_CMPLT,
-	OP_CMPEQ,
-	OP_CMPGT,
-	OP_CMPLE,
-	OP_CMPGE,
-	OP_MIN,
-	OP_MAX,
-	OP_AVG,
-
-	/* return vector, takes in a vector and an explicitly unsigned vector */
-	OP_LSHIFT,
-	OP_RSHIFT,
-	OP_LRSHIFT,
-
-	/* return vector, takes in a vector */
-	OP_NOT,
-
-	OP_FINAL_,
-
-	/* operations that have some sort of "caveat" should go here, until
-	 * they are fixed or removed */
-
-	OP_DIV, /* this one causes a floating point exception on my machine.
-	         * possibly we could change the behavior of divide-by-zero
-	         * with some gcc pragma ?  --paper */
-	OP_MOD, /* ditto with the above */
-};
-
-/* convert op -> string */
-static struct {
-	const char *u;
-	const char *l;
-} op_names[] = {
-	[OP_SPLAT] = {"SPLAT", "splat"},
-	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"},
-	[OP_LOAD] = {"LOAD", "load"},
-	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"},
-	[OP_STORE] = {"STORE", "store"},
-	[OP_ADD] = {"ADD", "add"},
-	[OP_SUB] = {"SUB", "sub"},
-	[OP_MUL] = {"MUL", "mul"},
-	/*[OP_DIV] = {"DIV", "div"},*/
-	[OP_AVG] = {"AVG", "avg"},
-	[OP_AND] = {"AND", "and"},
-	[OP_OR] = {"OR", "or"},
-	[OP_XOR] = {"XOR", "xor"},
-	[OP_NOT] = {"NOT", "not"},
-	[OP_CMPLT] = {"CMPLT", "cmplt"},
-	[OP_CMPEQ] = {"CMPEQ", "cmpeq"},
-	[OP_CMPGT] = {"CMPGT", "cmpgt"},
-	[OP_CMPLE] = {"CMPLE", "cmple"},
-	[OP_CMPGE] = {"CMPGE", "cmpge"},
-	[OP_MIN] = {"MIN", "min"},
-	[OP_MAX] = {"MAX", "max"},
-	[OP_RSHIFT] = {"RSHIFT", "rshift"},
-	[OP_LRSHIFT] = {"LRSHIFT", "lrshift"},
-	[OP_LSHIFT] = {"LSHIFT", "lshift"},
-};
-
-#define UPSIGN(x) ((x) ? "" : "U")
-#define LOSIGN(x) ((x) ? "" : "u")
-
-static void print_gcc_op(enum op op, int is_signed, int bits, int size)
-{
-	int i;
-	int gccprereq = 0;
-
-	switch (op) {
-	case OP_CMPEQ:
-	case OP_CMPLE:
-	case OP_CMPLT:
-	case OP_CMPGT:
-	case OP_CMPGE:
-	case OP_MIN:
-	case OP_MAX:
-	case OP_LSHIFT:
-	case OP_RSHIFT:
-	case OP_LRSHIFT:
-	case OP_AVG:
-		puts("#if VEC_GNUC_ATLEAST(4, 3, 0)");
-		gccprereq = 1;
-		break;
-	default:
-		break;
-	}
-	
-	printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
-
-	printf("VEC_FUNC_IMPL ");
-
-	/* first; the return value */
-	switch (op) {
-	case OP_SPLAT:
-	case OP_LOAD_ALIGNED:
-	case OP_LOAD:
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_DIV:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-	case OP_CMPLE:
-	case OP_CMPGE:
-	case OP_MIN:
-	case OP_MAX:
-	case OP_AVG:
-	case OP_RSHIFT:
-	case OP_LRSHIFT:
-	case OP_LSHIFT:
-	case OP_NOT:
-		printf("v%sint%dx%d", LOSIGN(is_signed), bits, size);
-		break;
-	case OP_STORE_ALIGNED:
-	case OP_STORE:
-		printf("void");
-		break;
-	}
-
-	/* whitespace and function name */
-	printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l);
-
-	/* parameters */
-	switch (op) {
-	case OP_SPLAT:
-		printf("vec_%sint%d x", LOSIGN(is_signed), bits);
-		break;
-	case OP_LOAD_ALIGNED:
-	case OP_LOAD:
-		printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size);
-		break;
-	case OP_STORE_ALIGNED:
-	case OP_STORE:
-		printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
-		break;
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_DIV:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-	case OP_CMPLE:
-	case OP_CMPGE:
-	case OP_MIN:
-	case OP_MAX:
-	case OP_AVG:
-		printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
-		break;
-	case OP_RSHIFT:
-	case OP_LRSHIFT:
-	case OP_LSHIFT:
-		printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size);
-		break;
-	case OP_NOT:
-		printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size);
-		break;
-	}
-
-	puts(")\n{");
-
-	switch (op) {
-	case OP_SPLAT:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		printf("\tvec.gcc = (__typeof__(vec.gcc)){");
-		for (i = 0; i < size; i++)
-			printf("x,");
-		printf("};\n");
-		printf("\treturn vec;\n");
-		break;
-	case OP_LOAD_ALIGNED:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;");
-		printf("\treturn vec;\n");
-		break;
-	case OP_LOAD:
-		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
-		puts("\tmemcpy(&vec, x, sizeof(vec));");
-		printf("\treturn vec;\n");
-		break;
-	case OP_STORE_ALIGNED:
-		puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;");
-		break;
-	case OP_STORE:
-		puts("\tmemcpy(arr, &vec, sizeof(vec));");
-		break;
-	case OP_ADD:
-	case OP_SUB:
-	case OP_MUL:
-	case OP_DIV:
-	case OP_AND:
-	case OP_OR:
-	case OP_XOR:
-	case OP_CMPLT:
-	case OP_CMPEQ:
-	case OP_CMPGT:
-	case OP_CMPLE:
-	case OP_CMPGE: {
-		const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="};
-
-		printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]);
-		printf("\treturn vec1;\n");
-		break;
-	}
-
-	case OP_LSHIFT:
-	case OP_RSHIFT: {
-		const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"};
-
-		printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]);
-		printf("\treturn vec1;\n");
-		break;
-	}
-
-	case OP_LRSHIFT: {
-		/* sigh */
-		printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8);
-		printf("\treturn vec1;\n");
-		break;
-	}
-	case OP_MIN:
-	case OP_MAX: {
-		const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"};
-
-		printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size);
-		printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]);
-		printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n");
-		printf("\treturn vec1;\n");
-		break;
-	}
-	case OP_AVG:
-		printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size);
-
-		if (is_signed) {
-			puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);");
-			puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);");
-			puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);");
-			puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);");
-			puts("");
-			printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n");
-		} else {
-			printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);\n");
-		}
-
-		printf("\treturn vec1;\n");
-		break;
-	case OP_NOT:
-		printf("\tvec.gcc = ~vec.gcc;\n");
-		printf("\treturn vec;\n");
-		break;
-	default:
-		printf("#error implement this operation");
-		break;
-	}
-
-	/* end function definition */
-	puts("}");
-
-	printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
-	puts("#endif");
-
-	if (gccprereq)
-		puts("#endif");
-}
-
-static inline void print_ops(int is_signed, int bits, int size)
-{
-	int i;
-
-	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
-
-	for (i = 0; i < OP_FINAL_; i++)
-		print_gcc_op(i, is_signed, bits, size);
-}
-
-static const char *header =
-	"/**\n"
-	" * vec - a tiny SIMD vector library in C99\n"
-	" * \n"
-	" * Copyright (c) 2024-2025 Paper\n"
-	" * \n"
-	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
-	" * of this software and associated documentation files (the \"Software\"), to deal\n"
-	" * in the Software without restriction, including without limitation the rights\n"
-	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
-	" * copies of the Software, and to permit persons to whom the Software is\n"
-	" * furnished to do so, subject to the following conditions:\n"
-	" * \n"
-	" * The above copyright notice and this permission notice shall be included in all\n"
-	" * copies or substantial portions of the Software.\n"
-	" * \n"
-	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
-	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
-	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
-	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
-	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
-	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
-	" * SOFTWARE.\n"
-	"**/\n"
-	"\n"
-	"/* This file is automatically generated! Do not edit it directly!\n"
-	" * Edit the code that generates it in utils/gengcc.c  --paper */\n"
-	"\n"
-	"#ifndef VEC_IMPL_GCC_H_\n"
-	"#define VEC_IMPL_GCC_H_\n"
-	"\n";
-
-static const char *footer = 
-	"#endif /* VEC_IMPL_GCC_H_ */\n";
-
-int main(void)
-{
-	static struct {
-		int bits, size;
-	} defs[] = {
-		/* -- 8-bit */
-		{8, 2},
-		{8, 4},
-		{8, 8},
-		{8, 16},
-		{8, 32},
-		{8, 64},
-
-		/* -- 16-bit */
-		{16, 2},
-		{16, 4},
-		{16, 8},
-		{16, 16},
-		{16, 32},
-
-		/* -- 32-bit */
-		{32, 2},
-		{32, 4},
-		{32, 8},
-		{32, 16},
-
-		/* -- 64-bit */
-		{64, 2},
-		{64, 4},
-		{64, 8},
-	};
-	int i;
-
-	puts(header);
-
-	for (i = 0; i < ARRAY_SIZE(defs); i++) {
-		print_ops(1, defs[i].bits, defs[i].size);
-		print_ops(0, defs[i].bits, defs[i].size);
-	}
-
-	puts(footer);
-}
--- a/utils/gengeneric.c	Tue Apr 29 16:54:13 2025 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,336 +0,0 @@
-/**
- * vec - a tiny SIMD vector library in C99
- * 
- * Copyright (c) 2024-2025 Paper
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- * 
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
-**/
-
-/* Use this file to generate include/vec/impl/generic.h !!
- *
- * `gcc -o gengeneric gengeneric.c` */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-
-#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
-
-/* XXX: would it be faster to unroll literally everything instead of defining everything,
- * and then unpacking it all? */
-static const char *header =
-	"/**\n"
-	" * vec - a tiny SIMD vector library in C99\n"
-	" * \n"
-	" * Copyright (c) 2024-2025 Paper\n"
-	" * \n"
-	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
-	" * of this software and associated documentation files (the \"Software\"), to deal\n"
-	" * in the Software without restriction, including without limitation the rights\n"
-	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
-	" * copies of the Software, and to permit persons to whom the Software is\n"
-	" * furnished to do so, subject to the following conditions:\n"
-	" * \n"
-	" * The above copyright notice and this permission notice shall be included in all\n"
-	" * copies or substantial portions of the Software.\n"
-	" * \n"
-	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
-	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
-	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
-	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
-	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
-	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
-	" * SOFTWARE.\n"
-	"**/\n"
-	"\n"
-	"/* This file is automatically generated! Do not edit it directly!\n"
-	" * Edit the code that generates it in utils/gengeneric.c  --paper */\n"
-	"\n"
-	"#ifndef VEC_IMPL_GENERIC_H_\n"
-	"#define VEC_IMPL_GENERIC_H_\n"
-	"\n"
-	"#define VEC_GENERIC_OPERATION(op, sign, bits, size) \\\n"
-	"	do { \\\n"
-	"		int i; \\\n"
-	"	\\\n"
-	"		for (i = 0; i < size; i++) \\\n"
-	"			vec1.generic[i] = (op); \\\n"
-	"	\\\n"
-	"		return vec1; \\\n"
-	"	} while (0)\n"
-	"\n"
-	"#define VEC_GENERIC_BUILTIN_OPERATION(op, sign, bits, size) \\\n"
-	"	VEC_GENERIC_OPERATION(vec1.generic[i] op vec2.generic[i], sign, bits, size)\n"
-	"\n"
-	"#define VEC_GENERIC_CMP(op, sign, bits, size) \\\n"
-	"	VEC_GENERIC_OPERATION((vec1.generic[i] op vec2.generic[i]) ? (vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits) : 0, sign, bits, size)\n"
-	"\n"
-	"/* okay, now we can do this crap: */\n"
-	"\n"
-	"#define VEC_GENERIC_SPLAT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size vec; \\\n"
-	"		for (int i = 0; i < size; i++) \\\n"
-	"			vec.generic[i] = x; \\\n"
-	"		return vec; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_LOAD_EX(name, sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits in[size]) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size vec; \\\n"
-	"		memcpy(&vec, in, sizeof(vec_##sign##int##bits) * size); \\\n"
-	"		return vec; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_LOAD_ALIGNED(sign, bits, size) VEC_GENERIC_LOAD_EX(load_aligned, sign, bits, size)\n"
-	"#define VEC_GENERIC_LOAD(sign, bits, size) VEC_GENERIC_LOAD_EX(load, sign, bits, size)\n"
-	"\n"
-	"#define VEC_GENERIC_STORE_EX(name, sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \\\n"
-	"	{ \\\n"
-	"		memcpy(out, &vec, sizeof(vec_##sign##int##bits) * size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_STORE_ALIGNED(sign, bits, size) VEC_GENERIC_STORE_EX(store_aligned, sign, bits, size)\n"
-	"#define VEC_GENERIC_STORE(sign, bits, size) VEC_GENERIC_STORE_EX(store, sign, bits, size)\n"
-	"\n"
-	"#define VEC_GENERIC_ADD(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(+, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_SUB(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(-, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_MUL(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(*, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_DIV(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_MOD(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_AVG(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		for (int i = 0; i < size; i++) \\\n"
-	"			vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n"
-	"	\\\n"
-	"		return vec1; \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_AND(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(&, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_OR(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(|, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_XOR(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_BUILTIN_OPERATION(^, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_NOT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n"
-	"	{ \\\n"
-	"		return v##sign##int##bits##x##size##_xor(vec, v##sign##int##bits##x##size##_splat((vec_##sign##int##bits)VEC_MAX_OF_TYPE(vec_uint##bits))); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_CMPLT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_CMP(<, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_CMPLE(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmpgt(vec1, vec2)); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_CMPEQ(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_CMP(==, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_CMPGE(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		return v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size##_cmplt(vec1, vec2)); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_CMPGT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_CMP(>, sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_LSHIFT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_OPERATION(vec_##sign##lshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_RSHIFT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_OPERATION(vec_##sign##rshift(vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_LRSHIFT(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		VEC_GENERIC_OPERATION(vec_urshift((vec_uint##bits)vec1.generic[i], vec2.generic[i]), sign, bits, size); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_MIN(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmplt(vec1, vec2); \\\n"
-	"	\\\n"
-	"		v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n"
-	"		v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n"
-	"	\\\n"
-	"		return v##sign##int##bits##x##size##_or(a, b); \\\n"
-	"	}\n"
-	"\n"
-	"#define VEC_GENERIC_MAX(sign, bits, size) \\\n"
-	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
-	"	{ \\\n"
-	"		v##sign##int##bits##x##size cmplt = v##sign##int##bits##x##size##_cmpgt(vec1, vec2); \\\n"
-	"	\\\n"
-	"		v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n"
-	"		v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n"
-	"	\\\n"
-	"		return v##sign##int##bits##x##size##_or(a, b); \\\n"
-	"	}\n"
-	"\n"
-	"/* ------------------------------------------------------------------------ */\n"
-	"/* PREPROCESSOR HELL INCOMING */\n";
-
-static const char *footer = 
-	"#endif /* VEC_IMPL_GENERIC_H_ */\n";
-
-/* ------------------------------------------------------------------------ */
-
-static void print_generic_op(const char *op, int is_signed, int bits, int size)
-{
-	printf(
-		"#ifndef V%sINT%dx%d_%s_DEFINED\n"
-		"VEC_GENERIC_%s(%s, %d, %d)\n"
-		"# define V%sINT%dx%d_%s_DEFINED\n"
-		"#endif\n",
-	(is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op);
-}
-
-typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size);
-
-static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op)
-{
-	/* all supported operations here */
-	static const char *ops[] = {
-		"SPLAT",
-		"LOAD_ALIGNED",
-		"LOAD",
-		"STORE_ALIGNED",
-		"STORE",
-		"ADD",
-		"SUB",
-		"MUL",
-		"DIV",
-		"MOD",
-		"AVG",
-		"AND",
-		"OR",
-		"XOR",
-		"NOT",
-		"CMPLT",
-		"CMPEQ",
-		"CMPGT",
-		"CMPLE", /* these two must be after CMPLT and CMPGT respectfully, */
-		"CMPGE", /* because their definitions call those functions */
-		"MIN",
-		"MAX",
-		"RSHIFT",
-		"LRSHIFT",
-		"LSHIFT",
-		NULL,
-	};
-	int i;
-
-	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
-
-	for (i = 0; ops[i]; i++)
-		print_op(ops[i], is_signed, bits, size);
-}
-
-int main(void)
-{
-	static struct {
-		int bits, size;
-		print_op_spec print_op;
-	} defs[] = {
-		/* -- 8-bit */
-		{8, 2, print_generic_op},
-
-		/* -- 16-bit */
-		{16, 2, print_generic_op},
-
-		/* -- 32-bit */
-		{32, 2, print_generic_op},
-
-		/* -- 64-bit */
-		{64, 2, print_generic_op},
-	};
-	int i;
-
-	puts(header);
-
-	for (i = 0; i < ARRAY_SIZE(defs); i++) {
-		print_ops(1, defs[i].bits, defs[i].size, defs[i].print_op);
-		print_ops(0, defs[i].bits, defs[i].size, defs[i].print_op);
-	}
-
-	puts(footer);
-}