changeset 40:55cadb1fac4b

*: add mod operation, add GCC vector backend need to test it with old gcc though. :)
author Paper <paper@tflc.us>
date Sun, 27 Apr 2025 02:49:53 -0400
parents f9ca85d2f14c
children c6e0df09b86f
files README include/vec/impl/gcc.h include/vec/impl/generic.h include/vec/vec.h test/Makefile.template test/test_arith.h utils/gengcc.c utils/gengeneric.c
diffstat 8 files changed, 7809 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/README	Sat Apr 26 15:31:39 2025 -0400
+++ b/README	Sun Apr 27 02:49:53 2025 -0400
@@ -63,6 +63,10 @@
 		considered defined behavior and should result in a zero;
 		if this doesn't happen it's considered a bug
 
+	v[u]intAxB mod(v[u]intAxB vec1, v[u]intAxB vec2)
+		gives the remainder of a division operation. as with div,
+		divide-by-zero is defined behavior.
+
 	v[u]intAxB and(v[u]intAxB vec1, v[u]intAxB vec2)
 		bitwise AND (&) of the values in both vectors
 
@@ -87,7 +91,9 @@
 	v[u]intAxB avg(v[u]intAxB vec1, v[u]intAxB vec2)
 		returns the average of the values in both vectors
 		i.e., div(add(vec1, vec2), splat(2)), without
-		the possibility of overflow.
+		the possibility of overflow. If you are familiar
+		with AltiVec, this operation exactly mimics
+		vec_avg.
 
 	v[u]intAxB min(v[u]intAxB vec1, v[u]intAxB vec2)
 		returns the minimum of the values in both vectors
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/vec/impl/gcc.h	Sun Apr 27 02:49:53 2025 -0400
@@ -0,0 +1,7089 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+/* This file is automatically generated! Do not edit it directly!
+ * Edit the code that generates it in utils/gengcc.c  --paper */
+
+#ifndef VEC_IMPL_GCC_H_
+#define VEC_IMPL_GCC_H_
+
+
+
+
+/* vuint8x2 */
+
+#ifndef VINT8x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x)
+{
+	vint8x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VINT8x2_SPLAT_DEFINED
+#endif
+#ifndef VINT8x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x2_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2])
+{
+	vint8x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x2_LOAD_DEFINED
+#endif
+#ifndef VINT8x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x2_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x2_STORE_DEFINED
+#endif
+#ifndef VINT8x2_ADD_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_ADD_DEFINED
+#endif
+#ifndef VINT8x2_SUB_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_SUB_DEFINED
+#endif
+#ifndef VINT8x2_MUL_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_MUL_DEFINED
+#endif
+#ifndef VINT8x2_AND_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_AND_DEFINED
+#endif
+#ifndef VINT8x2_OR_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_OR_DEFINED
+#endif
+#ifndef VINT8x2_XOR_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_XOR_DEFINED
+#endif
+#ifndef VINT8x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPLT_DEFINED
+#endif
+#ifndef VINT8x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPGT_DEFINED
+#endif
+#ifndef VINT8x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPLE_DEFINED
+#endif
+#ifndef VINT8x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_CMPGE_DEFINED
+#endif
+#ifndef VINT8x2_MIN_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2)
+{
+	vint8x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x2_MIN_DEFINED
+#endif
+#ifndef VINT8x2_MAX_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2)
+{
+	vint8x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x2_MAX_DEFINED
+#endif
+#ifndef VINT8x2_AVG_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2)
+{
+	vint8x2 ones = vint8x2_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x2_AVG_DEFINED
+#endif
+#ifndef VINT8x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x2_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x2_NOT_DEFINED
+VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x2_NOT_DEFINED
+#endif
+
+
+/* vint8x2 */
+
+#ifndef VUINT8x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x)
+{
+	vuint8x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VUINT8x2_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x2_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2])
+{
+	vuint8x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x2_LOAD_DEFINED
+#endif
+#ifndef VUINT8x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x2_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x2_STORE_DEFINED
+#endif
+#ifndef VUINT8x2_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_ADD_DEFINED
+#endif
+#ifndef VUINT8x2_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_SUB_DEFINED
+#endif
+#ifndef VUINT8x2_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_MUL_DEFINED
+#endif
+#ifndef VUINT8x2_AND_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_AND_DEFINED
+#endif
+#ifndef VUINT8x2_OR_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_OR_DEFINED
+#endif
+#ifndef VUINT8x2_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_XOR_DEFINED
+#endif
+#ifndef VUINT8x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x2_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vuint8x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x2_MIN_DEFINED
+#endif
+#ifndef VUINT8x2_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vuint8x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x2_MAX_DEFINED
+#endif
+#ifndef VUINT8x2_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x2_AVG_DEFINED
+#endif
+#ifndef VUINT8x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x2_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x2_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x2_NOT_DEFINED
+#endif
+
+
+/* vuint8x4 */
+
+#ifndef VINT8x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x)
+{
+	vint8x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VINT8x4_SPLAT_DEFINED
+#endif
+#ifndef VINT8x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x4_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4])
+{
+	vint8x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x4_LOAD_DEFINED
+#endif
+#ifndef VINT8x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x4_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x4_STORE_DEFINED
+#endif
+#ifndef VINT8x4_ADD_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_ADD_DEFINED
+#endif
+#ifndef VINT8x4_SUB_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_SUB_DEFINED
+#endif
+#ifndef VINT8x4_MUL_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_MUL_DEFINED
+#endif
+#ifndef VINT8x4_AND_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_AND_DEFINED
+#endif
+#ifndef VINT8x4_OR_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_OR_DEFINED
+#endif
+#ifndef VINT8x4_XOR_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_XOR_DEFINED
+#endif
+#ifndef VINT8x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPLT_DEFINED
+#endif
+#ifndef VINT8x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPGT_DEFINED
+#endif
+#ifndef VINT8x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPLE_DEFINED
+#endif
+#ifndef VINT8x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_CMPGE_DEFINED
+#endif
+#ifndef VINT8x4_MIN_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2)
+{
+	vint8x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x4_MIN_DEFINED
+#endif
+#ifndef VINT8x4_MAX_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2)
+{
+	vint8x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x4_MAX_DEFINED
+#endif
+#ifndef VINT8x4_AVG_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2)
+{
+	vint8x4 ones = vint8x4_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x4_AVG_DEFINED
+#endif
+#ifndef VINT8x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x4_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x4_NOT_DEFINED
+VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x4_NOT_DEFINED
+#endif
+
+
+/* vint8x4 */
+
+#ifndef VUINT8x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x)
+{
+	vuint8x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VUINT8x4_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x4_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4])
+{
+	vuint8x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x4_LOAD_DEFINED
+#endif
+#ifndef VUINT8x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x4_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x4_STORE_DEFINED
+#endif
+#ifndef VUINT8x4_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_ADD_DEFINED
+#endif
+#ifndef VUINT8x4_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_SUB_DEFINED
+#endif
+#ifndef VUINT8x4_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_MUL_DEFINED
+#endif
+#ifndef VUINT8x4_AND_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_AND_DEFINED
+#endif
+#ifndef VUINT8x4_OR_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_OR_DEFINED
+#endif
+#ifndef VUINT8x4_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_XOR_DEFINED
+#endif
+#ifndef VUINT8x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x4_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vuint8x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x4_MIN_DEFINED
+#endif
+#ifndef VUINT8x4_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vuint8x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x4_MAX_DEFINED
+#endif
+#ifndef VUINT8x4_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x4_AVG_DEFINED
+#endif
+#ifndef VUINT8x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x4_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x4_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x4_NOT_DEFINED
+#endif
+
+
+/* vuint8x8 */
+
+#ifndef VINT8x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x)
+{
+	vint8x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT8x8_SPLAT_DEFINED
+#endif
+#ifndef VINT8x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x8_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8])
+{
+	vint8x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x8_LOAD_DEFINED
+#endif
+#ifndef VINT8x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x8_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x8_STORE_DEFINED
+#endif
+#ifndef VINT8x8_ADD_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_ADD_DEFINED
+#endif
+#ifndef VINT8x8_SUB_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_SUB_DEFINED
+#endif
+#ifndef VINT8x8_MUL_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_MUL_DEFINED
+#endif
+#ifndef VINT8x8_AND_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_AND_DEFINED
+#endif
+#ifndef VINT8x8_OR_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_OR_DEFINED
+#endif
+#ifndef VINT8x8_XOR_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_XOR_DEFINED
+#endif
+#ifndef VINT8x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPLT_DEFINED
+#endif
+#ifndef VINT8x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPGT_DEFINED
+#endif
+#ifndef VINT8x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPLE_DEFINED
+#endif
+#ifndef VINT8x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_CMPGE_DEFINED
+#endif
+#ifndef VINT8x8_MIN_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2)
+{
+	vint8x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x8_MIN_DEFINED
+#endif
+#ifndef VINT8x8_MAX_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2)
+{
+	vint8x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x8_MAX_DEFINED
+#endif
+#ifndef VINT8x8_AVG_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2)
+{
+	vint8x8 ones = vint8x8_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x8_AVG_DEFINED
+#endif
+#ifndef VINT8x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x8_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x8_NOT_DEFINED
+VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x8_NOT_DEFINED
+#endif
+
+
+/* vint8x8 */
+
+#ifndef VUINT8x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x)
+{
+	vuint8x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT8x8_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x8_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8])
+{
+	vuint8x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x8_LOAD_DEFINED
+#endif
+#ifndef VUINT8x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x8_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x8_STORE_DEFINED
+#endif
+#ifndef VUINT8x8_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_ADD_DEFINED
+#endif
+#ifndef VUINT8x8_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_SUB_DEFINED
+#endif
+#ifndef VUINT8x8_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_MUL_DEFINED
+#endif
+#ifndef VUINT8x8_AND_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_AND_DEFINED
+#endif
+#ifndef VUINT8x8_OR_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_OR_DEFINED
+#endif
+#ifndef VUINT8x8_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_XOR_DEFINED
+#endif
+#ifndef VUINT8x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x8_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vuint8x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x8_MIN_DEFINED
+#endif
+#ifndef VUINT8x8_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vuint8x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x8_MAX_DEFINED
+#endif
+#ifndef VUINT8x8_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x8_AVG_DEFINED
+#endif
+#ifndef VUINT8x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x8_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x8_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x8_NOT_DEFINED
+#endif
+
+
+/* vuint8x16 */
+
+#ifndef VINT8x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x)
+{
+	vint8x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT8x16_SPLAT_DEFINED
+#endif
+#ifndef VINT8x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x16_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16])
+{
+	vint8x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x16_LOAD_DEFINED
+#endif
+#ifndef VINT8x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x16_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x16_STORE_DEFINED
+#endif
+#ifndef VINT8x16_ADD_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_ADD_DEFINED
+#endif
+#ifndef VINT8x16_SUB_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_SUB_DEFINED
+#endif
+#ifndef VINT8x16_MUL_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_MUL_DEFINED
+#endif
+#ifndef VINT8x16_AND_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_AND_DEFINED
+#endif
+#ifndef VINT8x16_OR_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_OR_DEFINED
+#endif
+#ifndef VINT8x16_XOR_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_XOR_DEFINED
+#endif
+#ifndef VINT8x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPLT_DEFINED
+#endif
+#ifndef VINT8x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPGT_DEFINED
+#endif
+#ifndef VINT8x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPLE_DEFINED
+#endif
+#ifndef VINT8x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_CMPGE_DEFINED
+#endif
+#ifndef VINT8x16_MIN_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x16_MIN_DEFINED
+#endif
+#ifndef VINT8x16_MAX_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x16_MAX_DEFINED
+#endif
+#ifndef VINT8x16_AVG_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2)
+{
+	vint8x16 ones = vint8x16_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x16_AVG_DEFINED
+#endif
+#ifndef VINT8x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x16_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x16_NOT_DEFINED
+VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x16_NOT_DEFINED
+#endif
+
+
+/* vint8x16 */
+
+#ifndef VUINT8x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x)
+{
+	vuint8x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT8x16_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x16_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16])
+{
+	vuint8x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x16_LOAD_DEFINED
+#endif
+#ifndef VUINT8x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x16_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x16_STORE_DEFINED
+#endif
+#ifndef VUINT8x16_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_ADD_DEFINED
+#endif
+#ifndef VUINT8x16_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_SUB_DEFINED
+#endif
+#ifndef VUINT8x16_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_MUL_DEFINED
+#endif
+#ifndef VUINT8x16_AND_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_AND_DEFINED
+#endif
+#ifndef VUINT8x16_OR_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_OR_DEFINED
+#endif
+#ifndef VUINT8x16_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_XOR_DEFINED
+#endif
+#ifndef VUINT8x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x16_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x16_MIN_DEFINED
+#endif
+#ifndef VUINT8x16_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vuint8x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x16_MAX_DEFINED
+#endif
+#ifndef VUINT8x16_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x16_AVG_DEFINED
+#endif
+#ifndef VUINT8x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x16_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x16_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x16_NOT_DEFINED
+#endif
+
+
+/* vuint8x32 */
+
+#ifndef VINT8x32_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x)
+{
+	vint8x32 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT8x32_SPLAT_DEFINED
+#endif
+#ifndef VINT8x32_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x32_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x32_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32])
+{
+	vint8x32 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x32_LOAD_DEFINED
+#endif
+#ifndef VINT8x32_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 arr[32])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x32_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x32_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 arr[32])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x32_STORE_DEFINED
+#endif
+#ifndef VINT8x32_ADD_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_ADD_DEFINED
+#endif
+#ifndef VINT8x32_SUB_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_SUB_DEFINED
+#endif
+#ifndef VINT8x32_MUL_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_MUL_DEFINED
+#endif
+#ifndef VINT8x32_AND_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_AND_DEFINED
+#endif
+#ifndef VINT8x32_OR_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_OR_DEFINED
+#endif
+#ifndef VINT8x32_XOR_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_XOR_DEFINED
+#endif
+#ifndef VINT8x32_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPLT_DEFINED
+#endif
+#ifndef VINT8x32_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x32_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPGT_DEFINED
+#endif
+#ifndef VINT8x32_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPLE_DEFINED
+#endif
+#ifndef VINT8x32_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_CMPGE_DEFINED
+#endif
+#ifndef VINT8x32_MIN_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2)
+{
+	vint8x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x32_MIN_DEFINED
+#endif
+#ifndef VINT8x32_MAX_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2)
+{
+	vint8x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x32_MAX_DEFINED
+#endif
+#ifndef VINT8x32_AVG_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2)
+{
+	vint8x32 ones = vint8x32_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x32_AVG_DEFINED
+#endif
+#ifndef VINT8x32_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x32_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x32_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x32_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x32_NOT_DEFINED
+VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x32_NOT_DEFINED
+#endif
+
+
+/* vint8x32 */
+
+#ifndef VUINT8x32_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x)
+{
+	vuint8x32 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT8x32_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x32_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x32_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x32_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32])
+{
+	vuint8x32 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x32_LOAD_DEFINED
+#endif
+#ifndef VUINT8x32_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 arr[32])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x32_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x32_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 arr[32])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x32_STORE_DEFINED
+#endif
+#ifndef VUINT8x32_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_ADD_DEFINED
+#endif
+#ifndef VUINT8x32_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_SUB_DEFINED
+#endif
+#ifndef VUINT8x32_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_MUL_DEFINED
+#endif
+#ifndef VUINT8x32_AND_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_AND_DEFINED
+#endif
+#ifndef VUINT8x32_OR_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_OR_DEFINED
+#endif
+#ifndef VUINT8x32_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_XOR_DEFINED
+#endif
+#ifndef VUINT8x32_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x32_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x32_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x32_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x32_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x32_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vuint8x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x32_MIN_DEFINED
+#endif
+#ifndef VUINT8x32_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vuint8x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x32_MAX_DEFINED
+#endif
+#ifndef VUINT8x32_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x32_AVG_DEFINED
+#endif
+#ifndef VUINT8x32_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x32_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x32_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x32_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x32_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x32_NOT_DEFINED
+#endif
+
+
+/* vuint8x64 */
+
+#ifndef VINT8x64_SPLAT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x)
+{
+	vint8x64 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT8x64_SPLAT_DEFINED
+#endif
+#ifndef VINT8x64_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT8x64_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x64_LOAD_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64])
+{
+	vint8x64 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT8x64_LOAD_DEFINED
+#endif
+#ifndef VINT8x64_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 arr[64])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT8x64_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT8x64_STORE_DEFINED
+VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 arr[64])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT8x64_STORE_DEFINED
+#endif
+#ifndef VINT8x64_ADD_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_ADD_DEFINED
+#endif
+#ifndef VINT8x64_SUB_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_SUB_DEFINED
+#endif
+#ifndef VINT8x64_MUL_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_MUL_DEFINED
+#endif
+#ifndef VINT8x64_AND_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_AND_DEFINED
+#endif
+#ifndef VINT8x64_OR_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_OR_DEFINED
+#endif
+#ifndef VINT8x64_XOR_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_XOR_DEFINED
+#endif
+#ifndef VINT8x64_CMPLT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPLT_DEFINED
+#endif
+#ifndef VINT8x64_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPEQ_DEFINED
+#endif
+#ifndef VINT8x64_CMPGT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPGT_DEFINED
+#endif
+#ifndef VINT8x64_CMPLE_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPLE_DEFINED
+#endif
+#ifndef VINT8x64_CMPGE_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_CMPGE_DEFINED
+#endif
+#ifndef VINT8x64_MIN_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2)
+{
+	vint8x64 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x64_MIN_DEFINED
+#endif
+#ifndef VINT8x64_MAX_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2)
+{
+	vint8x64 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT8x64_MAX_DEFINED
+#endif
+#ifndef VINT8x64_AVG_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2)
+{
+	vint8x64 ones = vint8x64_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT8x64_AVG_DEFINED
+#endif
+#ifndef VINT8x64_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_LSHIFT_DEFINED
+#endif
+#ifndef VINT8x64_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_RSHIFT_DEFINED
+#endif
+#ifndef VINT8x64_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT8x64_LRSHIFT_DEFINED
+#endif
+#ifndef VINT8x64_NOT_DEFINED
+VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT8x64_NOT_DEFINED
+#endif
+
+
+/* vint8x64 */
+
+#ifndef VUINT8x64_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x)
+{
+	vuint8x64 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT8x64_SPLAT_DEFINED
+#endif
+#ifndef VUINT8x64_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT8x64_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x64_LOAD_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64])
+{
+	vuint8x64 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT8x64_LOAD_DEFINED
+#endif
+#ifndef VUINT8x64_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 arr[64])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT8x64_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT8x64_STORE_DEFINED
+VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 arr[64])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT8x64_STORE_DEFINED
+#endif
+#ifndef VUINT8x64_ADD_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_ADD_DEFINED
+#endif
+#ifndef VUINT8x64_SUB_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_SUB_DEFINED
+#endif
+#ifndef VUINT8x64_MUL_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_MUL_DEFINED
+#endif
+#ifndef VUINT8x64_AND_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_AND_DEFINED
+#endif
+#ifndef VUINT8x64_OR_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_OR_DEFINED
+#endif
+#ifndef VUINT8x64_XOR_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_XOR_DEFINED
+#endif
+#ifndef VUINT8x64_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_CMPLT_DEFINED
+#endif
+#ifndef VUINT8x64_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_CMPEQ_DEFINED
+#endif
+#ifndef VUINT8x64_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_CMPGT_DEFINED
+#endif
+#ifndef VUINT8x64_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_CMPLE_DEFINED
+#endif
+#ifndef VUINT8x64_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_CMPGE_DEFINED
+#endif
+#ifndef VUINT8x64_MIN_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vuint8x64 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x64_MIN_DEFINED
+#endif
+#ifndef VUINT8x64_MAX_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vuint8x64 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT8x64_MAX_DEFINED
+#endif
+#ifndef VUINT8x64_AVG_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT8x64_AVG_DEFINED
+#endif
+#ifndef VUINT8x64_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_LSHIFT_DEFINED
+#endif
+#ifndef VUINT8x64_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_RSHIFT_DEFINED
+#endif
+#ifndef VUINT8x64_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT8x64_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT8x64_NOT_DEFINED
+VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT8x64_NOT_DEFINED
+#endif
+
+
+/* vuint16x2 */
+
+#ifndef VINT16x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x)
+{
+	vint16x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VINT16x2_SPLAT_DEFINED
+#endif
+#ifndef VINT16x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT16x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x2_LOAD_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2])
+{
+	vint16x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT16x2_LOAD_DEFINED
+#endif
+#ifndef VINT16x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT16x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x2_STORE_DEFINED
+VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT16x2_STORE_DEFINED
+#endif
+#ifndef VINT16x2_ADD_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_ADD_DEFINED
+#endif
+#ifndef VINT16x2_SUB_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_SUB_DEFINED
+#endif
+#ifndef VINT16x2_MUL_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_MUL_DEFINED
+#endif
+#ifndef VINT16x2_AND_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_AND_DEFINED
+#endif
+#ifndef VINT16x2_OR_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_OR_DEFINED
+#endif
+#ifndef VINT16x2_XOR_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_XOR_DEFINED
+#endif
+#ifndef VINT16x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPLT_DEFINED
+#endif
+#ifndef VINT16x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPEQ_DEFINED
+#endif
+#ifndef VINT16x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPGT_DEFINED
+#endif
+#ifndef VINT16x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPLE_DEFINED
+#endif
+#ifndef VINT16x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_CMPGE_DEFINED
+#endif
+#ifndef VINT16x2_MIN_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2)
+{
+	vint16x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x2_MIN_DEFINED
+#endif
+#ifndef VINT16x2_MAX_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2)
+{
+	vint16x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x2_MAX_DEFINED
+#endif
+#ifndef VINT16x2_AVG_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2)
+{
+	vint16x2 ones = vint16x2_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT16x2_AVG_DEFINED
+#endif
+#ifndef VINT16x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_LSHIFT_DEFINED
+#endif
+#ifndef VINT16x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_RSHIFT_DEFINED
+#endif
+#ifndef VINT16x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x2_LRSHIFT_DEFINED
+#endif
+#ifndef VINT16x2_NOT_DEFINED
+VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x2_NOT_DEFINED
+#endif
+
+
+/* vint16x2 */
+
+#ifndef VUINT16x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x)
+{
+	vuint16x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VUINT16x2_SPLAT_DEFINED
+#endif
+#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT16x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x2_LOAD_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2])
+{
+	vuint16x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT16x2_LOAD_DEFINED
+#endif
+#ifndef VUINT16x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT16x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x2_STORE_DEFINED
+VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT16x2_STORE_DEFINED
+#endif
+#ifndef VUINT16x2_ADD_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_ADD_DEFINED
+#endif
+#ifndef VUINT16x2_SUB_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_SUB_DEFINED
+#endif
+#ifndef VUINT16x2_MUL_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_MUL_DEFINED
+#endif
+#ifndef VUINT16x2_AND_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_AND_DEFINED
+#endif
+#ifndef VUINT16x2_OR_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_OR_DEFINED
+#endif
+#ifndef VUINT16x2_XOR_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_XOR_DEFINED
+#endif
+#ifndef VUINT16x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_CMPLT_DEFINED
+#endif
+#ifndef VUINT16x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_CMPEQ_DEFINED
+#endif
+#ifndef VUINT16x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_CMPGT_DEFINED
+#endif
+#ifndef VUINT16x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_CMPLE_DEFINED
+#endif
+#ifndef VUINT16x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_CMPGE_DEFINED
+#endif
+#ifndef VUINT16x2_MIN_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vuint16x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x2_MIN_DEFINED
+#endif
+#ifndef VUINT16x2_MAX_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vuint16x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x2_MAX_DEFINED
+#endif
+#ifndef VUINT16x2_AVG_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x2_AVG_DEFINED
+#endif
+#ifndef VUINT16x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_LSHIFT_DEFINED
+#endif
+#ifndef VUINT16x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_RSHIFT_DEFINED
+#endif
+#ifndef VUINT16x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x2_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT16x2_NOT_DEFINED
+VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x2_NOT_DEFINED
+#endif
+
+
+/* vuint16x4 */
+
+#ifndef VINT16x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x)
+{
+	vint16x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VINT16x4_SPLAT_DEFINED
+#endif
+#ifndef VINT16x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT16x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x4_LOAD_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4])
+{
+	vint16x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT16x4_LOAD_DEFINED
+#endif
+#ifndef VINT16x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT16x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x4_STORE_DEFINED
+VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT16x4_STORE_DEFINED
+#endif
+#ifndef VINT16x4_ADD_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_ADD_DEFINED
+#endif
+#ifndef VINT16x4_SUB_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_SUB_DEFINED
+#endif
+#ifndef VINT16x4_MUL_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_MUL_DEFINED
+#endif
+#ifndef VINT16x4_AND_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_AND_DEFINED
+#endif
+#ifndef VINT16x4_OR_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_OR_DEFINED
+#endif
+#ifndef VINT16x4_XOR_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_XOR_DEFINED
+#endif
+#ifndef VINT16x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPLT_DEFINED
+#endif
+#ifndef VINT16x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPEQ_DEFINED
+#endif
+#ifndef VINT16x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPGT_DEFINED
+#endif
+#ifndef VINT16x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPLE_DEFINED
+#endif
+#ifndef VINT16x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_CMPGE_DEFINED
+#endif
+#ifndef VINT16x4_MIN_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2)
+{
+	vint16x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x4_MIN_DEFINED
+#endif
+#ifndef VINT16x4_MAX_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2)
+{
+	vint16x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x4_MAX_DEFINED
+#endif
+#ifndef VINT16x4_AVG_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2)
+{
+	vint16x4 ones = vint16x4_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT16x4_AVG_DEFINED
+#endif
+#ifndef VINT16x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_LSHIFT_DEFINED
+#endif
+#ifndef VINT16x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_RSHIFT_DEFINED
+#endif
+#ifndef VINT16x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x4_LRSHIFT_DEFINED
+#endif
+#ifndef VINT16x4_NOT_DEFINED
+VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x4_NOT_DEFINED
+#endif
+
+
+/* vint16x4 */
+
+#ifndef VUINT16x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x)
+{
+	vuint16x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VUINT16x4_SPLAT_DEFINED
+#endif
+#ifndef VUINT16x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT16x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x4_LOAD_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4])
+{
+	vuint16x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT16x4_LOAD_DEFINED
+#endif
+#ifndef VUINT16x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT16x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x4_STORE_DEFINED
+VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT16x4_STORE_DEFINED
+#endif
+#ifndef VUINT16x4_ADD_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_ADD_DEFINED
+#endif
+#ifndef VUINT16x4_SUB_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_SUB_DEFINED
+#endif
+#ifndef VUINT16x4_MUL_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_MUL_DEFINED
+#endif
+#ifndef VUINT16x4_AND_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_AND_DEFINED
+#endif
+#ifndef VUINT16x4_OR_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_OR_DEFINED
+#endif
+#ifndef VUINT16x4_XOR_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_XOR_DEFINED
+#endif
+#ifndef VUINT16x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_CMPLT_DEFINED
+#endif
+#ifndef VUINT16x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_CMPEQ_DEFINED
+#endif
+#ifndef VUINT16x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_CMPGT_DEFINED
+#endif
+#ifndef VUINT16x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_CMPLE_DEFINED
+#endif
+#ifndef VUINT16x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_CMPGE_DEFINED
+#endif
+#ifndef VUINT16x4_MIN_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vuint16x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x4_MIN_DEFINED
+#endif
+#ifndef VUINT16x4_MAX_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vuint16x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x4_MAX_DEFINED
+#endif
+#ifndef VUINT16x4_AVG_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x4_AVG_DEFINED
+#endif
+#ifndef VUINT16x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_LSHIFT_DEFINED
+#endif
+#ifndef VUINT16x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_RSHIFT_DEFINED
+#endif
+#ifndef VUINT16x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x4_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT16x4_NOT_DEFINED
+VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x4_NOT_DEFINED
+#endif
+
+
+/* vuint16x8 */
+
+#ifndef VINT16x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x)
+{
+	vint16x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT16x8_SPLAT_DEFINED
+#endif
+#ifndef VINT16x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x8_LOAD_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8])
+{
+	vint16x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT16x8_LOAD_DEFINED
+#endif
+#ifndef VINT16x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x8_STORE_DEFINED
+VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT16x8_STORE_DEFINED
+#endif
+#ifndef VINT16x8_ADD_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_ADD_DEFINED
+#endif
+#ifndef VINT16x8_SUB_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_SUB_DEFINED
+#endif
+#ifndef VINT16x8_MUL_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_MUL_DEFINED
+#endif
+#ifndef VINT16x8_AND_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_AND_DEFINED
+#endif
+#ifndef VINT16x8_OR_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_OR_DEFINED
+#endif
+#ifndef VINT16x8_XOR_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_XOR_DEFINED
+#endif
+#ifndef VINT16x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPLT_DEFINED
+#endif
+#ifndef VINT16x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPEQ_DEFINED
+#endif
+#ifndef VINT16x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPGT_DEFINED
+#endif
+#ifndef VINT16x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPLE_DEFINED
+#endif
+#ifndef VINT16x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_CMPGE_DEFINED
+#endif
+#ifndef VINT16x8_MIN_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x8_MIN_DEFINED
+#endif
+#ifndef VINT16x8_MAX_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x8_MAX_DEFINED
+#endif
+#ifndef VINT16x8_AVG_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2)
+{
+	vint16x8 ones = vint16x8_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT16x8_AVG_DEFINED
+#endif
+#ifndef VINT16x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_LSHIFT_DEFINED
+#endif
+#ifndef VINT16x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_RSHIFT_DEFINED
+#endif
+#ifndef VINT16x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x8_LRSHIFT_DEFINED
+#endif
+#ifndef VINT16x8_NOT_DEFINED
+VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x8_NOT_DEFINED
+#endif
+
+
+/* vint16x8 */
+
+#ifndef VUINT16x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x)
+{
+	vuint16x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT16x8_SPLAT_DEFINED
+#endif
+#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT16x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x8_LOAD_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8])
+{
+	vuint16x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT16x8_LOAD_DEFINED
+#endif
+#ifndef VUINT16x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT16x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x8_STORE_DEFINED
+VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT16x8_STORE_DEFINED
+#endif
+#ifndef VUINT16x8_ADD_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_ADD_DEFINED
+#endif
+#ifndef VUINT16x8_SUB_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_SUB_DEFINED
+#endif
+#ifndef VUINT16x8_MUL_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_MUL_DEFINED
+#endif
+#ifndef VUINT16x8_AND_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_AND_DEFINED
+#endif
+#ifndef VUINT16x8_OR_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_OR_DEFINED
+#endif
+#ifndef VUINT16x8_XOR_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_XOR_DEFINED
+#endif
+#ifndef VUINT16x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_CMPLT_DEFINED
+#endif
+#ifndef VUINT16x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_CMPEQ_DEFINED
+#endif
+#ifndef VUINT16x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_CMPGT_DEFINED
+#endif
+#ifndef VUINT16x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_CMPLE_DEFINED
+#endif
+#ifndef VUINT16x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_CMPGE_DEFINED
+#endif
+#ifndef VUINT16x8_MIN_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x8_MIN_DEFINED
+#endif
+#ifndef VUINT16x8_MAX_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vuint16x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x8_MAX_DEFINED
+#endif
+#ifndef VUINT16x8_AVG_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x8_AVG_DEFINED
+#endif
+#ifndef VUINT16x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_LSHIFT_DEFINED
+#endif
+#ifndef VUINT16x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_RSHIFT_DEFINED
+#endif
+#ifndef VUINT16x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x8_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT16x8_NOT_DEFINED
+VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x8_NOT_DEFINED
+#endif
+
+
+/* vuint16x16 */
+
+#ifndef VINT16x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x)
+{
+	vint16x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT16x16_SPLAT_DEFINED
+#endif
+#ifndef VINT16x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT16x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x16_LOAD_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16])
+{
+	vint16x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT16x16_LOAD_DEFINED
+#endif
+#ifndef VINT16x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT16x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x16_STORE_DEFINED
+VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT16x16_STORE_DEFINED
+#endif
+#ifndef VINT16x16_ADD_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_ADD_DEFINED
+#endif
+#ifndef VINT16x16_SUB_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_SUB_DEFINED
+#endif
+#ifndef VINT16x16_MUL_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_MUL_DEFINED
+#endif
+#ifndef VINT16x16_AND_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_AND_DEFINED
+#endif
+#ifndef VINT16x16_OR_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_OR_DEFINED
+#endif
+#ifndef VINT16x16_XOR_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_XOR_DEFINED
+#endif
+#ifndef VINT16x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPLT_DEFINED
+#endif
+#ifndef VINT16x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPEQ_DEFINED
+#endif
+#ifndef VINT16x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPGT_DEFINED
+#endif
+#ifndef VINT16x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPLE_DEFINED
+#endif
+#ifndef VINT16x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_CMPGE_DEFINED
+#endif
+#ifndef VINT16x16_MIN_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2)
+{
+	vint16x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x16_MIN_DEFINED
+#endif
+#ifndef VINT16x16_MAX_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2)
+{
+	vint16x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x16_MAX_DEFINED
+#endif
+#ifndef VINT16x16_AVG_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2)
+{
+	vint16x16 ones = vint16x16_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT16x16_AVG_DEFINED
+#endif
+#ifndef VINT16x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_LSHIFT_DEFINED
+#endif
+#ifndef VINT16x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_RSHIFT_DEFINED
+#endif
+#ifndef VINT16x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x16_LRSHIFT_DEFINED
+#endif
+#ifndef VINT16x16_NOT_DEFINED
+VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x16_NOT_DEFINED
+#endif
+
+
+/* vint16x16 */
+
+#ifndef VUINT16x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x)
+{
+	vuint16x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT16x16_SPLAT_DEFINED
+#endif
+#ifndef VUINT16x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT16x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x16_LOAD_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16])
+{
+	vuint16x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT16x16_LOAD_DEFINED
+#endif
+#ifndef VUINT16x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT16x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x16_STORE_DEFINED
+VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT16x16_STORE_DEFINED
+#endif
+#ifndef VUINT16x16_ADD_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_ADD_DEFINED
+#endif
+#ifndef VUINT16x16_SUB_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_SUB_DEFINED
+#endif
+#ifndef VUINT16x16_MUL_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_MUL_DEFINED
+#endif
+#ifndef VUINT16x16_AND_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_AND_DEFINED
+#endif
+#ifndef VUINT16x16_OR_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_OR_DEFINED
+#endif
+#ifndef VUINT16x16_XOR_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_XOR_DEFINED
+#endif
+#ifndef VUINT16x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_CMPLT_DEFINED
+#endif
+#ifndef VUINT16x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_CMPEQ_DEFINED
+#endif
+#ifndef VUINT16x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_CMPGT_DEFINED
+#endif
+#ifndef VUINT16x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_CMPLE_DEFINED
+#endif
+#ifndef VUINT16x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_CMPGE_DEFINED
+#endif
+#ifndef VUINT16x16_MIN_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vuint16x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x16_MIN_DEFINED
+#endif
+#ifndef VUINT16x16_MAX_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vuint16x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x16_MAX_DEFINED
+#endif
+#ifndef VUINT16x16_AVG_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x16_AVG_DEFINED
+#endif
+#ifndef VUINT16x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_LSHIFT_DEFINED
+#endif
+#ifndef VUINT16x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_RSHIFT_DEFINED
+#endif
+#ifndef VUINT16x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x16_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT16x16_NOT_DEFINED
+VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x16_NOT_DEFINED
+#endif
+
+
+/* vuint16x32 */
+
+#ifndef VINT16x32_SPLAT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x)
+{
+	vint16x32 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT16x32_SPLAT_DEFINED
+#endif
+#ifndef VINT16x32_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT16x32_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x32_LOAD_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32])
+{
+	vint16x32 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT16x32_LOAD_DEFINED
+#endif
+#ifndef VINT16x32_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 arr[32])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT16x32_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT16x32_STORE_DEFINED
+VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 arr[32])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT16x32_STORE_DEFINED
+#endif
+#ifndef VINT16x32_ADD_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_ADD_DEFINED
+#endif
+#ifndef VINT16x32_SUB_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_SUB_DEFINED
+#endif
+#ifndef VINT16x32_MUL_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_MUL_DEFINED
+#endif
+#ifndef VINT16x32_AND_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_AND_DEFINED
+#endif
+#ifndef VINT16x32_OR_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_OR_DEFINED
+#endif
+#ifndef VINT16x32_XOR_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_XOR_DEFINED
+#endif
+#ifndef VINT16x32_CMPLT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPLT_DEFINED
+#endif
+#ifndef VINT16x32_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPEQ_DEFINED
+#endif
+#ifndef VINT16x32_CMPGT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPGT_DEFINED
+#endif
+#ifndef VINT16x32_CMPLE_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPLE_DEFINED
+#endif
+#ifndef VINT16x32_CMPGE_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_CMPGE_DEFINED
+#endif
+#ifndef VINT16x32_MIN_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2)
+{
+	vint16x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x32_MIN_DEFINED
+#endif
+#ifndef VINT16x32_MAX_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2)
+{
+	vint16x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT16x32_MAX_DEFINED
+#endif
+#ifndef VINT16x32_AVG_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2)
+{
+	vint16x32 ones = vint16x32_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT16x32_AVG_DEFINED
+#endif
+#ifndef VINT16x32_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_LSHIFT_DEFINED
+#endif
+#ifndef VINT16x32_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_RSHIFT_DEFINED
+#endif
+#ifndef VINT16x32_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT16x32_LRSHIFT_DEFINED
+#endif
+#ifndef VINT16x32_NOT_DEFINED
+VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT16x32_NOT_DEFINED
+#endif
+
+
+/* vint16x32 */
+
+#ifndef VUINT16x32_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x)
+{
+	vuint16x32 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT16x32_SPLAT_DEFINED
+#endif
+#ifndef VUINT16x32_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT16x32_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x32_LOAD_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32])
+{
+	vuint16x32 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT16x32_LOAD_DEFINED
+#endif
+#ifndef VUINT16x32_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 arr[32])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT16x32_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT16x32_STORE_DEFINED
+VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 arr[32])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT16x32_STORE_DEFINED
+#endif
+#ifndef VUINT16x32_ADD_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_ADD_DEFINED
+#endif
+#ifndef VUINT16x32_SUB_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_SUB_DEFINED
+#endif
+#ifndef VUINT16x32_MUL_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_MUL_DEFINED
+#endif
+#ifndef VUINT16x32_AND_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_AND_DEFINED
+#endif
+#ifndef VUINT16x32_OR_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_OR_DEFINED
+#endif
+#ifndef VUINT16x32_XOR_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_XOR_DEFINED
+#endif
+#ifndef VUINT16x32_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_CMPLT_DEFINED
+#endif
+#ifndef VUINT16x32_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_CMPEQ_DEFINED
+#endif
+#ifndef VUINT16x32_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_CMPGT_DEFINED
+#endif
+#ifndef VUINT16x32_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_CMPLE_DEFINED
+#endif
+#ifndef VUINT16x32_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_CMPGE_DEFINED
+#endif
+#ifndef VUINT16x32_MIN_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vuint16x32 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x32_MIN_DEFINED
+#endif
+#ifndef VUINT16x32_MAX_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vuint16x32 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT16x32_MAX_DEFINED
+#endif
+#ifndef VUINT16x32_AVG_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT16x32_AVG_DEFINED
+#endif
+#ifndef VUINT16x32_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_LSHIFT_DEFINED
+#endif
+#ifndef VUINT16x32_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_RSHIFT_DEFINED
+#endif
+#ifndef VUINT16x32_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT16x32_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT16x32_NOT_DEFINED
+VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT16x32_NOT_DEFINED
+#endif
+
+
+/* vuint32x2 */
+
+#ifndef VINT32x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x)
+{
+	vint32x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VINT32x2_SPLAT_DEFINED
+#endif
+#ifndef VINT32x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT32x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x2_LOAD_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2])
+{
+	vint32x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT32x2_LOAD_DEFINED
+#endif
+#ifndef VINT32x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT32x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x2_STORE_DEFINED
+VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT32x2_STORE_DEFINED
+#endif
+#ifndef VINT32x2_ADD_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_ADD_DEFINED
+#endif
+#ifndef VINT32x2_SUB_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_SUB_DEFINED
+#endif
+#ifndef VINT32x2_MUL_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_MUL_DEFINED
+#endif
+#ifndef VINT32x2_AND_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_AND_DEFINED
+#endif
+#ifndef VINT32x2_OR_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_OR_DEFINED
+#endif
+#ifndef VINT32x2_XOR_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_XOR_DEFINED
+#endif
+#ifndef VINT32x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPLT_DEFINED
+#endif
+#ifndef VINT32x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPEQ_DEFINED
+#endif
+#ifndef VINT32x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPGT_DEFINED
+#endif
+#ifndef VINT32x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPLE_DEFINED
+#endif
+#ifndef VINT32x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_CMPGE_DEFINED
+#endif
+#ifndef VINT32x2_MIN_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2)
+{
+	vint32x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x2_MIN_DEFINED
+#endif
+#ifndef VINT32x2_MAX_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2)
+{
+	vint32x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x2_MAX_DEFINED
+#endif
+#ifndef VINT32x2_AVG_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2)
+{
+	vint32x2 ones = vint32x2_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT32x2_AVG_DEFINED
+#endif
+#ifndef VINT32x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_LSHIFT_DEFINED
+#endif
+#ifndef VINT32x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_RSHIFT_DEFINED
+#endif
+#ifndef VINT32x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x2_LRSHIFT_DEFINED
+#endif
+#ifndef VINT32x2_NOT_DEFINED
+VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x2_NOT_DEFINED
+#endif
+
+
+/* vint32x2 */
+
+#ifndef VUINT32x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x)
+{
+	vuint32x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VUINT32x2_SPLAT_DEFINED
+#endif
+#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT32x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x2_LOAD_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2])
+{
+	vuint32x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT32x2_LOAD_DEFINED
+#endif
+#ifndef VUINT32x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT32x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x2_STORE_DEFINED
+VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT32x2_STORE_DEFINED
+#endif
+#ifndef VUINT32x2_ADD_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_ADD_DEFINED
+#endif
+#ifndef VUINT32x2_SUB_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_SUB_DEFINED
+#endif
+#ifndef VUINT32x2_MUL_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_MUL_DEFINED
+#endif
+#ifndef VUINT32x2_AND_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_AND_DEFINED
+#endif
+#ifndef VUINT32x2_OR_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_OR_DEFINED
+#endif
+#ifndef VUINT32x2_XOR_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_XOR_DEFINED
+#endif
+#ifndef VUINT32x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_CMPLT_DEFINED
+#endif
+#ifndef VUINT32x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_CMPEQ_DEFINED
+#endif
+#ifndef VUINT32x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_CMPGT_DEFINED
+#endif
+#ifndef VUINT32x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_CMPLE_DEFINED
+#endif
+#ifndef VUINT32x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_CMPGE_DEFINED
+#endif
+#ifndef VUINT32x2_MIN_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vuint32x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x2_MIN_DEFINED
+#endif
+#ifndef VUINT32x2_MAX_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vuint32x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x2_MAX_DEFINED
+#endif
+#ifndef VUINT32x2_AVG_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x2_AVG_DEFINED
+#endif
+#ifndef VUINT32x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_LSHIFT_DEFINED
+#endif
+#ifndef VUINT32x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_RSHIFT_DEFINED
+#endif
+#ifndef VUINT32x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x2_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT32x2_NOT_DEFINED
+VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x2_NOT_DEFINED
+#endif
+
+
+/* vuint32x4 */
+
+#ifndef VINT32x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x)
+{
+	vint32x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VINT32x4_SPLAT_DEFINED
+#endif
+#ifndef VINT32x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x4_LOAD_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4])
+{
+	vint32x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT32x4_LOAD_DEFINED
+#endif
+#ifndef VINT32x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x4_STORE_DEFINED
+VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT32x4_STORE_DEFINED
+#endif
+#ifndef VINT32x4_ADD_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_ADD_DEFINED
+#endif
+#ifndef VINT32x4_SUB_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_SUB_DEFINED
+#endif
+#ifndef VINT32x4_MUL_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_MUL_DEFINED
+#endif
+#ifndef VINT32x4_AND_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_AND_DEFINED
+#endif
+#ifndef VINT32x4_OR_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_OR_DEFINED
+#endif
+#ifndef VINT32x4_XOR_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_XOR_DEFINED
+#endif
+#ifndef VINT32x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPLT_DEFINED
+#endif
+#ifndef VINT32x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPEQ_DEFINED
+#endif
+#ifndef VINT32x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPGT_DEFINED
+#endif
+#ifndef VINT32x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPLE_DEFINED
+#endif
+#ifndef VINT32x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_CMPGE_DEFINED
+#endif
+#ifndef VINT32x4_MIN_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x4_MIN_DEFINED
+#endif
+#ifndef VINT32x4_MAX_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x4_MAX_DEFINED
+#endif
+#ifndef VINT32x4_AVG_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2)
+{
+	vint32x4 ones = vint32x4_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT32x4_AVG_DEFINED
+#endif
+#ifndef VINT32x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_LSHIFT_DEFINED
+#endif
+#ifndef VINT32x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_RSHIFT_DEFINED
+#endif
+#ifndef VINT32x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x4_LRSHIFT_DEFINED
+#endif
+#ifndef VINT32x4_NOT_DEFINED
+VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x4_NOT_DEFINED
+#endif
+
+
+/* vint32x4 */
+
+#ifndef VUINT32x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x)
+{
+	vuint32x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VUINT32x4_SPLAT_DEFINED
+#endif
+#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT32x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x4_LOAD_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4])
+{
+	vuint32x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT32x4_LOAD_DEFINED
+#endif
+#ifndef VUINT32x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT32x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x4_STORE_DEFINED
+VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT32x4_STORE_DEFINED
+#endif
+#ifndef VUINT32x4_ADD_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_ADD_DEFINED
+#endif
+#ifndef VUINT32x4_SUB_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_SUB_DEFINED
+#endif
+#ifndef VUINT32x4_MUL_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_MUL_DEFINED
+#endif
+#ifndef VUINT32x4_AND_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_AND_DEFINED
+#endif
+#ifndef VUINT32x4_OR_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_OR_DEFINED
+#endif
+#ifndef VUINT32x4_XOR_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_XOR_DEFINED
+#endif
+#ifndef VUINT32x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_CMPLT_DEFINED
+#endif
+#ifndef VUINT32x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_CMPEQ_DEFINED
+#endif
+#ifndef VUINT32x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_CMPGT_DEFINED
+#endif
+#ifndef VUINT32x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_CMPLE_DEFINED
+#endif
+#ifndef VUINT32x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_CMPGE_DEFINED
+#endif
+#ifndef VUINT32x4_MIN_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x4_MIN_DEFINED
+#endif
+#ifndef VUINT32x4_MAX_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vuint32x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x4_MAX_DEFINED
+#endif
+#ifndef VUINT32x4_AVG_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x4_AVG_DEFINED
+#endif
+#ifndef VUINT32x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_LSHIFT_DEFINED
+#endif
+#ifndef VUINT32x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_RSHIFT_DEFINED
+#endif
+#ifndef VUINT32x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x4_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT32x4_NOT_DEFINED
+VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x4_NOT_DEFINED
+#endif
+
+
+/* vuint32x8 */
+
+#ifndef VINT32x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x)
+{
+	vint32x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT32x8_SPLAT_DEFINED
+#endif
+#ifndef VINT32x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT32x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x8_LOAD_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8])
+{
+	vint32x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT32x8_LOAD_DEFINED
+#endif
+#ifndef VINT32x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT32x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x8_STORE_DEFINED
+VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT32x8_STORE_DEFINED
+#endif
+#ifndef VINT32x8_ADD_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_ADD_DEFINED
+#endif
+#ifndef VINT32x8_SUB_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_SUB_DEFINED
+#endif
+#ifndef VINT32x8_MUL_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_MUL_DEFINED
+#endif
+#ifndef VINT32x8_AND_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_AND_DEFINED
+#endif
+#ifndef VINT32x8_OR_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_OR_DEFINED
+#endif
+#ifndef VINT32x8_XOR_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_XOR_DEFINED
+#endif
+#ifndef VINT32x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPLT_DEFINED
+#endif
+#ifndef VINT32x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPEQ_DEFINED
+#endif
+#ifndef VINT32x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPGT_DEFINED
+#endif
+#ifndef VINT32x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPLE_DEFINED
+#endif
+#ifndef VINT32x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_CMPGE_DEFINED
+#endif
+#ifndef VINT32x8_MIN_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2)
+{
+	vint32x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x8_MIN_DEFINED
+#endif
+#ifndef VINT32x8_MAX_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2)
+{
+	vint32x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x8_MAX_DEFINED
+#endif
+#ifndef VINT32x8_AVG_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2)
+{
+	vint32x8 ones = vint32x8_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT32x8_AVG_DEFINED
+#endif
+#ifndef VINT32x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_LSHIFT_DEFINED
+#endif
+#ifndef VINT32x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_RSHIFT_DEFINED
+#endif
+#ifndef VINT32x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x8_LRSHIFT_DEFINED
+#endif
+#ifndef VINT32x8_NOT_DEFINED
+VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x8_NOT_DEFINED
+#endif
+
+
+/* vint32x8 */
+
+#ifndef VUINT32x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x)
+{
+	vuint32x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT32x8_SPLAT_DEFINED
+#endif
+#ifndef VUINT32x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT32x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x8_LOAD_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8])
+{
+	vuint32x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT32x8_LOAD_DEFINED
+#endif
+#ifndef VUINT32x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT32x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x8_STORE_DEFINED
+VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT32x8_STORE_DEFINED
+#endif
+#ifndef VUINT32x8_ADD_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_ADD_DEFINED
+#endif
+#ifndef VUINT32x8_SUB_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_SUB_DEFINED
+#endif
+#ifndef VUINT32x8_MUL_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_MUL_DEFINED
+#endif
+#ifndef VUINT32x8_AND_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_AND_DEFINED
+#endif
+#ifndef VUINT32x8_OR_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_OR_DEFINED
+#endif
+#ifndef VUINT32x8_XOR_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_XOR_DEFINED
+#endif
+#ifndef VUINT32x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_CMPLT_DEFINED
+#endif
+#ifndef VUINT32x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_CMPEQ_DEFINED
+#endif
+#ifndef VUINT32x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_CMPGT_DEFINED
+#endif
+#ifndef VUINT32x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_CMPLE_DEFINED
+#endif
+#ifndef VUINT32x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_CMPGE_DEFINED
+#endif
+#ifndef VUINT32x8_MIN_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vuint32x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x8_MIN_DEFINED
+#endif
+#ifndef VUINT32x8_MAX_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vuint32x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x8_MAX_DEFINED
+#endif
+#ifndef VUINT32x8_AVG_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x8_AVG_DEFINED
+#endif
+#ifndef VUINT32x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_LSHIFT_DEFINED
+#endif
+#ifndef VUINT32x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_RSHIFT_DEFINED
+#endif
+#ifndef VUINT32x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x8_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT32x8_NOT_DEFINED
+VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x8_NOT_DEFINED
+#endif
+
+
+/* vuint32x16 */
+
+#ifndef VINT32x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x)
+{
+	vint32x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT32x16_SPLAT_DEFINED
+#endif
+#ifndef VINT32x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT32x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x16_LOAD_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16])
+{
+	vint32x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT32x16_LOAD_DEFINED
+#endif
+#ifndef VINT32x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT32x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT32x16_STORE_DEFINED
+VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT32x16_STORE_DEFINED
+#endif
+#ifndef VINT32x16_ADD_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_ADD_DEFINED
+#endif
+#ifndef VINT32x16_SUB_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_SUB_DEFINED
+#endif
+#ifndef VINT32x16_MUL_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_MUL_DEFINED
+#endif
+#ifndef VINT32x16_AND_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_AND_DEFINED
+#endif
+#ifndef VINT32x16_OR_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_OR_DEFINED
+#endif
+#ifndef VINT32x16_XOR_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_XOR_DEFINED
+#endif
+#ifndef VINT32x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPLT_DEFINED
+#endif
+#ifndef VINT32x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPEQ_DEFINED
+#endif
+#ifndef VINT32x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPGT_DEFINED
+#endif
+#ifndef VINT32x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPLE_DEFINED
+#endif
+#ifndef VINT32x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_CMPGE_DEFINED
+#endif
+#ifndef VINT32x16_MIN_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2)
+{
+	vint32x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x16_MIN_DEFINED
+#endif
+#ifndef VINT32x16_MAX_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2)
+{
+	vint32x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT32x16_MAX_DEFINED
+#endif
+#ifndef VINT32x16_AVG_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2)
+{
+	vint32x16 ones = vint32x16_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT32x16_AVG_DEFINED
+#endif
+#ifndef VINT32x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_LSHIFT_DEFINED
+#endif
+#ifndef VINT32x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_RSHIFT_DEFINED
+#endif
+#ifndef VINT32x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT32x16_LRSHIFT_DEFINED
+#endif
+#ifndef VINT32x16_NOT_DEFINED
+VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT32x16_NOT_DEFINED
+#endif
+
+
+/* vint32x16 */
+
+#ifndef VUINT32x16_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x)
+{
+	vuint32x16 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT32x16_SPLAT_DEFINED
+#endif
+#ifndef VUINT32x16_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT32x16_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x16_LOAD_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16])
+{
+	vuint32x16 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT32x16_LOAD_DEFINED
+#endif
+#ifndef VUINT32x16_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 arr[16])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT32x16_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT32x16_STORE_DEFINED
+VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 arr[16])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT32x16_STORE_DEFINED
+#endif
+#ifndef VUINT32x16_ADD_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_ADD_DEFINED
+#endif
+#ifndef VUINT32x16_SUB_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_SUB_DEFINED
+#endif
+#ifndef VUINT32x16_MUL_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_MUL_DEFINED
+#endif
+#ifndef VUINT32x16_AND_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_AND_DEFINED
+#endif
+#ifndef VUINT32x16_OR_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_OR_DEFINED
+#endif
+#ifndef VUINT32x16_XOR_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_XOR_DEFINED
+#endif
+#ifndef VUINT32x16_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_CMPLT_DEFINED
+#endif
+#ifndef VUINT32x16_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_CMPEQ_DEFINED
+#endif
+#ifndef VUINT32x16_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_CMPGT_DEFINED
+#endif
+#ifndef VUINT32x16_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_CMPLE_DEFINED
+#endif
+#ifndef VUINT32x16_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_CMPGE_DEFINED
+#endif
+#ifndef VUINT32x16_MIN_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vuint32x16 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x16_MIN_DEFINED
+#endif
+#ifndef VUINT32x16_MAX_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vuint32x16 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT32x16_MAX_DEFINED
+#endif
+#ifndef VUINT32x16_AVG_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT32x16_AVG_DEFINED
+#endif
+#ifndef VUINT32x16_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_LSHIFT_DEFINED
+#endif
+#ifndef VUINT32x16_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_RSHIFT_DEFINED
+#endif
+#ifndef VUINT32x16_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT32x16_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT32x16_NOT_DEFINED
+VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT32x16_NOT_DEFINED
+#endif
+
+
+/* vuint64x2 */
+
+#ifndef VINT64x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x)
+{
+	vint64x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VINT64x2_SPLAT_DEFINED
+#endif
+#ifndef VINT64x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT64x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x2_LOAD_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2])
+{
+	vint64x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT64x2_LOAD_DEFINED
+#endif
+#ifndef VINT64x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT64x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x2_STORE_DEFINED
+VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT64x2_STORE_DEFINED
+#endif
+#ifndef VINT64x2_ADD_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_ADD_DEFINED
+#endif
+#ifndef VINT64x2_SUB_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_SUB_DEFINED
+#endif
+#ifndef VINT64x2_MUL_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_MUL_DEFINED
+#endif
+#ifndef VINT64x2_AND_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_AND_DEFINED
+#endif
+#ifndef VINT64x2_OR_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_OR_DEFINED
+#endif
+#ifndef VINT64x2_XOR_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_XOR_DEFINED
+#endif
+#ifndef VINT64x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPLT_DEFINED
+#endif
+#ifndef VINT64x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPEQ_DEFINED
+#endif
+#ifndef VINT64x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPGT_DEFINED
+#endif
+#ifndef VINT64x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPLE_DEFINED
+#endif
+#ifndef VINT64x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_CMPGE_DEFINED
+#endif
+#ifndef VINT64x2_MIN_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2)
+{
+	vint64x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x2_MIN_DEFINED
+#endif
+#ifndef VINT64x2_MAX_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2)
+{
+	vint64x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x2_MAX_DEFINED
+#endif
+#ifndef VINT64x2_AVG_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2)
+{
+	vint64x2 ones = vint64x2_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT64x2_AVG_DEFINED
+#endif
+#ifndef VINT64x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_LSHIFT_DEFINED
+#endif
+#ifndef VINT64x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_RSHIFT_DEFINED
+#endif
+#ifndef VINT64x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x2_LRSHIFT_DEFINED
+#endif
+#ifndef VINT64x2_NOT_DEFINED
+VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x2_NOT_DEFINED
+#endif
+
+
+/* vint64x2 */
+
+#ifndef VUINT64x2_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x)
+{
+	vuint64x2 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,};
+	return vec;
+}
+# define VUINT64x2_SPLAT_DEFINED
+#endif
+#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT64x2_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x2_LOAD_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2])
+{
+	vuint64x2 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT64x2_LOAD_DEFINED
+#endif
+#ifndef VUINT64x2_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 arr[2])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT64x2_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x2_STORE_DEFINED
+VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 arr[2])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT64x2_STORE_DEFINED
+#endif
+#ifndef VUINT64x2_ADD_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_ADD_DEFINED
+#endif
+#ifndef VUINT64x2_SUB_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_SUB_DEFINED
+#endif
+#ifndef VUINT64x2_MUL_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_MUL_DEFINED
+#endif
+#ifndef VUINT64x2_AND_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_AND_DEFINED
+#endif
+#ifndef VUINT64x2_OR_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_OR_DEFINED
+#endif
+#ifndef VUINT64x2_XOR_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_XOR_DEFINED
+#endif
+#ifndef VUINT64x2_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_CMPLT_DEFINED
+#endif
+#ifndef VUINT64x2_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_CMPEQ_DEFINED
+#endif
+#ifndef VUINT64x2_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_CMPGT_DEFINED
+#endif
+#ifndef VUINT64x2_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_CMPLE_DEFINED
+#endif
+#ifndef VUINT64x2_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_CMPGE_DEFINED
+#endif
+#ifndef VUINT64x2_MIN_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vuint64x2 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x2_MIN_DEFINED
+#endif
+#ifndef VUINT64x2_MAX_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vuint64x2 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x2_MAX_DEFINED
+#endif
+#ifndef VUINT64x2_AVG_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x2_AVG_DEFINED
+#endif
+#ifndef VUINT64x2_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_LSHIFT_DEFINED
+#endif
+#ifndef VUINT64x2_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_RSHIFT_DEFINED
+#endif
+#ifndef VUINT64x2_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x2_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT64x2_NOT_DEFINED
+VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x2_NOT_DEFINED
+#endif
+
+
+/* vuint64x4 */
+
+#ifndef VINT64x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x)
+{
+	vint64x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VINT64x4_SPLAT_DEFINED
+#endif
+#ifndef VINT64x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT64x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x4_LOAD_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4])
+{
+	vint64x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT64x4_LOAD_DEFINED
+#endif
+#ifndef VINT64x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT64x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x4_STORE_DEFINED
+VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT64x4_STORE_DEFINED
+#endif
+#ifndef VINT64x4_ADD_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_ADD_DEFINED
+#endif
+#ifndef VINT64x4_SUB_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_SUB_DEFINED
+#endif
+#ifndef VINT64x4_MUL_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_MUL_DEFINED
+#endif
+#ifndef VINT64x4_AND_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_AND_DEFINED
+#endif
+#ifndef VINT64x4_OR_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_OR_DEFINED
+#endif
+#ifndef VINT64x4_XOR_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_XOR_DEFINED
+#endif
+#ifndef VINT64x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPLT_DEFINED
+#endif
+#ifndef VINT64x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPEQ_DEFINED
+#endif
+#ifndef VINT64x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPGT_DEFINED
+#endif
+#ifndef VINT64x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPLE_DEFINED
+#endif
+#ifndef VINT64x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_CMPGE_DEFINED
+#endif
+#ifndef VINT64x4_MIN_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2)
+{
+	vint64x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x4_MIN_DEFINED
+#endif
+#ifndef VINT64x4_MAX_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2)
+{
+	vint64x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x4_MAX_DEFINED
+#endif
+#ifndef VINT64x4_AVG_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2)
+{
+	vint64x4 ones = vint64x4_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT64x4_AVG_DEFINED
+#endif
+#ifndef VINT64x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_LSHIFT_DEFINED
+#endif
+#ifndef VINT64x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_RSHIFT_DEFINED
+#endif
+#ifndef VINT64x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x4_LRSHIFT_DEFINED
+#endif
+#ifndef VINT64x4_NOT_DEFINED
+VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x4_NOT_DEFINED
+#endif
+
+
+/* vint64x4 */
+
+#ifndef VUINT64x4_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x)
+{
+	vuint64x4 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,};
+	return vec;
+}
+# define VUINT64x4_SPLAT_DEFINED
+#endif
+#ifndef VUINT64x4_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT64x4_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x4_LOAD_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4])
+{
+	vuint64x4 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT64x4_LOAD_DEFINED
+#endif
+#ifndef VUINT64x4_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 arr[4])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT64x4_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x4_STORE_DEFINED
+VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 arr[4])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT64x4_STORE_DEFINED
+#endif
+#ifndef VUINT64x4_ADD_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_ADD_DEFINED
+#endif
+#ifndef VUINT64x4_SUB_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_SUB_DEFINED
+#endif
+#ifndef VUINT64x4_MUL_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_MUL_DEFINED
+#endif
+#ifndef VUINT64x4_AND_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_AND_DEFINED
+#endif
+#ifndef VUINT64x4_OR_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_OR_DEFINED
+#endif
+#ifndef VUINT64x4_XOR_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_XOR_DEFINED
+#endif
+#ifndef VUINT64x4_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_CMPLT_DEFINED
+#endif
+#ifndef VUINT64x4_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_CMPEQ_DEFINED
+#endif
+#ifndef VUINT64x4_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_CMPGT_DEFINED
+#endif
+#ifndef VUINT64x4_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_CMPLE_DEFINED
+#endif
+#ifndef VUINT64x4_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_CMPGE_DEFINED
+#endif
+#ifndef VUINT64x4_MIN_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vuint64x4 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x4_MIN_DEFINED
+#endif
+#ifndef VUINT64x4_MAX_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vuint64x4 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x4_MAX_DEFINED
+#endif
+#ifndef VUINT64x4_AVG_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x4_AVG_DEFINED
+#endif
+#ifndef VUINT64x4_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_LSHIFT_DEFINED
+#endif
+#ifndef VUINT64x4_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_RSHIFT_DEFINED
+#endif
+#ifndef VUINT64x4_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x4_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT64x4_NOT_DEFINED
+VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x4_NOT_DEFINED
+#endif
+
+
+/* vuint64x8 */
+
+#ifndef VINT64x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x)
+{
+	vint64x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VINT64x8_SPLAT_DEFINED
+#endif
+#ifndef VINT64x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VINT64x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x8_LOAD_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8])
+{
+	vint64x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VINT64x8_LOAD_DEFINED
+#endif
+#ifndef VINT64x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VINT64x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VINT64x8_STORE_DEFINED
+VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VINT64x8_STORE_DEFINED
+#endif
+#ifndef VINT64x8_ADD_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_ADD_DEFINED
+#endif
+#ifndef VINT64x8_SUB_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_SUB_DEFINED
+#endif
+#ifndef VINT64x8_MUL_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_MUL_DEFINED
+#endif
+#ifndef VINT64x8_AND_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_AND_DEFINED
+#endif
+#ifndef VINT64x8_OR_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_OR_DEFINED
+#endif
+#ifndef VINT64x8_XOR_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_XOR_DEFINED
+#endif
+#ifndef VINT64x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPLT_DEFINED
+#endif
+#ifndef VINT64x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPEQ_DEFINED
+#endif
+#ifndef VINT64x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPGT_DEFINED
+#endif
+#ifndef VINT64x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPLE_DEFINED
+#endif
+#ifndef VINT64x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_CMPGE_DEFINED
+#endif
+#ifndef VINT64x8_MIN_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2)
+{
+	vint64x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x8_MIN_DEFINED
+#endif
+#ifndef VINT64x8_MAX_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2)
+{
+	vint64x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VINT64x8_MAX_DEFINED
+#endif
+#ifndef VINT64x8_AVG_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2)
+{
+	vint64x8 ones = vint64x8_splat(1);
+	__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);
+	__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);
+	__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);
+
+	vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);
+	return vec1;
+}
+# define VINT64x8_AVG_DEFINED
+#endif
+#ifndef VINT64x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_LSHIFT_DEFINED
+#endif
+#ifndef VINT64x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_RSHIFT_DEFINED
+#endif
+#ifndef VINT64x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VINT64x8_LRSHIFT_DEFINED
+#endif
+#ifndef VINT64x8_NOT_DEFINED
+VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VINT64x8_NOT_DEFINED
+#endif
+
+
+/* vint64x8 */
+
+#ifndef VUINT64x8_SPLAT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x)
+{
+	vuint64x8 vec;
+	vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,};
+	return vec;
+}
+# define VUINT64x8_SPLAT_DEFINED
+#endif
+#ifndef VUINT64x8_LOAD_ALIGNED_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	vec.gcc = *(__typeof__(vec.gcc) *)x;
+	return vec;
+}
+# define VUINT64x8_LOAD_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x8_LOAD_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8])
+{
+	vuint64x8 vec;
+	memcpy(&vec, x, sizeof(vec));
+	return vec;
+}
+# define VUINT64x8_LOAD_DEFINED
+#endif
+#ifndef VUINT64x8_STORE_ALIGNED_DEFINED
+VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 arr[8])
+{
+	*(__typeof__(vec.gcc) *)arr = vec.gcc;
+}
+# define VUINT64x8_STORE_ALIGNED_DEFINED
+#endif
+#ifndef VUINT64x8_STORE_DEFINED
+VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 arr[8])
+{
+	memcpy(arr, &vec, sizeof(vec));
+}
+# define VUINT64x8_STORE_DEFINED
+#endif
+#ifndef VUINT64x8_ADD_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc + vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_ADD_DEFINED
+#endif
+#ifndef VUINT64x8_SUB_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc - vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_SUB_DEFINED
+#endif
+#ifndef VUINT64x8_MUL_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc * vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_MUL_DEFINED
+#endif
+#ifndef VUINT64x8_AND_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc & vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_AND_DEFINED
+#endif
+#ifndef VUINT64x8_OR_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc | vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_OR_DEFINED
+#endif
+#ifndef VUINT64x8_XOR_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc ^ vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_XOR_DEFINED
+#endif
+#ifndef VUINT64x8_CMPLT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc < vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_CMPLT_DEFINED
+#endif
+#ifndef VUINT64x8_CMPEQ_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc == vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_CMPEQ_DEFINED
+#endif
+#ifndef VUINT64x8_CMPGT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc > vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_CMPGT_DEFINED
+#endif
+#ifndef VUINT64x8_CMPLE_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc <= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_CMPLE_DEFINED
+#endif
+#ifndef VUINT64x8_CMPGE_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >= vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_CMPGE_DEFINED
+#endif
+#ifndef VUINT64x8_MIN_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vuint64x8 mask;
+	mask.gcc = (vec1.gcc < vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x8_MIN_DEFINED
+#endif
+#ifndef VUINT64x8_MAX_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vuint64x8 mask;
+	mask.gcc = (vec1.gcc > vec2.gcc);
+	vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);
+	return vec1;
+}
+# define VUINT64x8_MAX_DEFINED
+#endif
+#ifndef VUINT64x8_AVG_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);
+	return vec1;
+}
+# define VUINT64x8_AVG_DEFINED
+#endif
+#ifndef VUINT64x8_LSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc << vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_LSHIFT_DEFINED
+#endif
+#ifndef VUINT64x8_RSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_RSHIFT_DEFINED
+#endif
+#ifndef VUINT64x8_LRSHIFT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2)
+{
+	vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc);
+	return vec1;
+}
+# define VUINT64x8_LRSHIFT_DEFINED
+#endif
+#ifndef VUINT64x8_NOT_DEFINED
+VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec)
+{
+	vec.gcc = ~vec.gcc;
+	return vec;
+}
+# define VUINT64x8_NOT_DEFINED
+#endif
+#endif /* VEC_IMPL_GCC_H_ */
+
--- a/include/vec/impl/generic.h	Sat Apr 26 15:31:39 2025 -0400
+++ b/include/vec/impl/generic.h	Sun Apr 27 02:49:53 2025 -0400
@@ -99,6 +99,12 @@
 		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \
 	}
 
+#define VEC_GENERIC_MOD(sign, bits, size) \
+	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
+	{ \
+		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \
+	}
+
 #define VEC_GENERIC_AVG(sign, bits, size) \
 	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
 	{ \
@@ -250,6 +256,7 @@
 #define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)
 #define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)
 #define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)
+#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)
 #define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)
 #define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)
 #define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)
@@ -317,6 +324,10 @@
 VEC_GENERIC_DIV(/* nothing */, 8, 2)
 # define VINT8x2_DIV_DEFINED
 #endif
+#ifndef VINT8x2_MOD_DEFINED
+VEC_GENERIC_MOD(/* nothing */, 8, 2)
+# define VINT8x2_MOD_DEFINED
+#endif
 #ifndef VINT8x2_AVG_DEFINED
 VEC_GENERIC_AVG(/* nothing */, 8, 2)
 # define VINT8x2_AVG_DEFINED
@@ -417,6 +428,10 @@
 VEC_GENERIC_DIV(u, 8, 2)
 # define VUINT8x2_DIV_DEFINED
 #endif
+#ifndef VUINT8x2_MOD_DEFINED
+VEC_GENERIC_MOD(u, 8, 2)
+# define VUINT8x2_MOD_DEFINED
+#endif
 #ifndef VUINT8x2_AVG_DEFINED
 VEC_GENERIC_AVG(u, 8, 2)
 # define VUINT8x2_AVG_DEFINED
@@ -526,6 +541,11 @@
 # define VINT8x4_DIV_DEFINED
 #endif
 
+#ifndef VINT8x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 8, 4, 2)
+# define VINT8x4_MOD_DEFINED
+#endif
+
 #ifndef VINT8x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 8, 4, 2)
 # define VINT8x4_AVG_DEFINED
@@ -650,6 +670,11 @@
 # define VUINT8x4_DIV_DEFINED
 #endif
 
+#ifndef VUINT8x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 8, 4, 2)
+# define VUINT8x4_MOD_DEFINED
+#endif
+
 #ifndef VUINT8x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 8, 4, 2)
 # define VUINT8x4_AVG_DEFINED
@@ -774,6 +799,11 @@
 # define VINT8x8_DIV_DEFINED
 #endif
 
+#ifndef VINT8x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 8, 8, 4)
+# define VINT8x8_MOD_DEFINED
+#endif
+
 #ifndef VINT8x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 8, 8, 4)
 # define VINT8x8_AVG_DEFINED
@@ -898,6 +928,11 @@
 # define VUINT8x8_DIV_DEFINED
 #endif
 
+#ifndef VUINT8x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 8, 8, 4)
+# define VUINT8x8_MOD_DEFINED
+#endif
+
 #ifndef VUINT8x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 8, 8, 4)
 # define VUINT8x8_AVG_DEFINED
@@ -1022,6 +1057,11 @@
 # define VINT8x16_DIV_DEFINED
 #endif
 
+#ifndef VINT8x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 8, 16, 8)
+# define VINT8x16_MOD_DEFINED
+#endif
+
 #ifndef VINT8x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 8, 16, 8)
 # define VINT8x16_AVG_DEFINED
@@ -1146,6 +1186,11 @@
 # define VUINT8x16_DIV_DEFINED
 #endif
 
+#ifndef VUINT8x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 8, 16, 8)
+# define VUINT8x16_MOD_DEFINED
+#endif
+
 #ifndef VUINT8x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 8, 16, 8)
 # define VUINT8x16_AVG_DEFINED
@@ -1270,6 +1315,11 @@
 # define VINT8x32_DIV_DEFINED
 #endif
 
+#ifndef VINT8x32_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 8, 32, 16)
+# define VINT8x32_MOD_DEFINED
+#endif
+
 #ifndef VINT8x32_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 8, 32, 16)
 # define VINT8x32_AVG_DEFINED
@@ -1394,6 +1444,11 @@
 # define VUINT8x32_DIV_DEFINED
 #endif
 
+#ifndef VUINT8x32_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 8, 32, 16)
+# define VUINT8x32_MOD_DEFINED
+#endif
+
 #ifndef VUINT8x32_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 8, 32, 16)
 # define VUINT8x32_AVG_DEFINED
@@ -1518,6 +1573,11 @@
 # define VINT8x64_DIV_DEFINED
 #endif
 
+#ifndef VINT8x64_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 8, 64, 32)
+# define VINT8x64_MOD_DEFINED
+#endif
+
 #ifndef VINT8x64_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 8, 64, 32)
 # define VINT8x64_AVG_DEFINED
@@ -1642,6 +1702,11 @@
 # define VUINT8x64_DIV_DEFINED
 #endif
 
+#ifndef VUINT8x64_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 8, 64, 32)
+# define VUINT8x64_MOD_DEFINED
+#endif
+
 #ifndef VUINT8x64_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 8, 64, 32)
 # define VUINT8x64_AVG_DEFINED
@@ -1757,6 +1822,10 @@
 VEC_GENERIC_DIV(/* nothing */, 16, 2)
 # define VINT16x2_DIV_DEFINED
 #endif
+#ifndef VINT16x2_MOD_DEFINED
+VEC_GENERIC_MOD(/* nothing */, 16, 2)
+# define VINT16x2_MOD_DEFINED
+#endif
 #ifndef VINT16x2_AVG_DEFINED
 VEC_GENERIC_AVG(/* nothing */, 16, 2)
 # define VINT16x2_AVG_DEFINED
@@ -1857,6 +1926,10 @@
 VEC_GENERIC_DIV(u, 16, 2)
 # define VUINT16x2_DIV_DEFINED
 #endif
+#ifndef VUINT16x2_MOD_DEFINED
+VEC_GENERIC_MOD(u, 16, 2)
+# define VUINT16x2_MOD_DEFINED
+#endif
 #ifndef VUINT16x2_AVG_DEFINED
 VEC_GENERIC_AVG(u, 16, 2)
 # define VUINT16x2_AVG_DEFINED
@@ -1966,6 +2039,11 @@
 # define VINT16x4_DIV_DEFINED
 #endif
 
+#ifndef VINT16x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 16, 4, 2)
+# define VINT16x4_MOD_DEFINED
+#endif
+
 #ifndef VINT16x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 16, 4, 2)
 # define VINT16x4_AVG_DEFINED
@@ -2090,6 +2168,11 @@
 # define VUINT16x4_DIV_DEFINED
 #endif
 
+#ifndef VUINT16x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 16, 4, 2)
+# define VUINT16x4_MOD_DEFINED
+#endif
+
 #ifndef VUINT16x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 16, 4, 2)
 # define VUINT16x4_AVG_DEFINED
@@ -2214,6 +2297,11 @@
 # define VINT16x8_DIV_DEFINED
 #endif
 
+#ifndef VINT16x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 16, 8, 4)
+# define VINT16x8_MOD_DEFINED
+#endif
+
 #ifndef VINT16x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 16, 8, 4)
 # define VINT16x8_AVG_DEFINED
@@ -2338,6 +2426,11 @@
 # define VUINT16x8_DIV_DEFINED
 #endif
 
+#ifndef VUINT16x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 16, 8, 4)
+# define VUINT16x8_MOD_DEFINED
+#endif
+
 #ifndef VUINT16x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 16, 8, 4)
 # define VUINT16x8_AVG_DEFINED
@@ -2462,6 +2555,11 @@
 # define VINT16x16_DIV_DEFINED
 #endif
 
+#ifndef VINT16x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 16, 16, 8)
+# define VINT16x16_MOD_DEFINED
+#endif
+
 #ifndef VINT16x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 16, 16, 8)
 # define VINT16x16_AVG_DEFINED
@@ -2586,6 +2684,11 @@
 # define VUINT16x16_DIV_DEFINED
 #endif
 
+#ifndef VUINT16x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 16, 16, 8)
+# define VUINT16x16_MOD_DEFINED
+#endif
+
 #ifndef VUINT16x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 16, 16, 8)
 # define VUINT16x16_AVG_DEFINED
@@ -2710,6 +2813,11 @@
 # define VINT16x32_DIV_DEFINED
 #endif
 
+#ifndef VINT16x32_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 16, 32, 16)
+# define VINT16x32_MOD_DEFINED
+#endif
+
 #ifndef VINT16x32_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 16, 32, 16)
 # define VINT16x32_AVG_DEFINED
@@ -2834,6 +2942,11 @@
 # define VUINT16x32_DIV_DEFINED
 #endif
 
+#ifndef VUINT16x32_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 16, 32, 16)
+# define VUINT16x32_MOD_DEFINED
+#endif
+
 #ifndef VUINT16x32_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 16, 32, 16)
 # define VUINT16x32_AVG_DEFINED
@@ -2949,6 +3062,10 @@
 VEC_GENERIC_DIV(/* nothing */, 32, 2)
 # define VINT32x2_DIV_DEFINED
 #endif
+#ifndef VINT32x2_MOD_DEFINED
+VEC_GENERIC_MOD(/* nothing */, 32, 2)
+# define VINT32x2_MOD_DEFINED
+#endif
 #ifndef VINT32x2_AVG_DEFINED
 VEC_GENERIC_AVG(/* nothing */, 32, 2)
 # define VINT32x2_AVG_DEFINED
@@ -3049,6 +3166,10 @@
 VEC_GENERIC_DIV(u, 32, 2)
 # define VUINT32x2_DIV_DEFINED
 #endif
+#ifndef VUINT32x2_MOD_DEFINED
+VEC_GENERIC_MOD(u, 32, 2)
+# define VUINT32x2_MOD_DEFINED
+#endif
 #ifndef VUINT32x2_AVG_DEFINED
 VEC_GENERIC_AVG(u, 32, 2)
 # define VUINT32x2_AVG_DEFINED
@@ -3158,6 +3279,11 @@
 # define VINT32x4_DIV_DEFINED
 #endif
 
+#ifndef VINT32x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 32, 4, 2)
+# define VINT32x4_MOD_DEFINED
+#endif
+
 #ifndef VINT32x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 32, 4, 2)
 # define VINT32x4_AVG_DEFINED
@@ -3282,6 +3408,11 @@
 # define VUINT32x4_DIV_DEFINED
 #endif
 
+#ifndef VUINT32x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 32, 4, 2)
+# define VUINT32x4_MOD_DEFINED
+#endif
+
 #ifndef VUINT32x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 32, 4, 2)
 # define VUINT32x4_AVG_DEFINED
@@ -3406,6 +3537,11 @@
 # define VINT32x8_DIV_DEFINED
 #endif
 
+#ifndef VINT32x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 32, 8, 4)
+# define VINT32x8_MOD_DEFINED
+#endif
+
 #ifndef VINT32x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 32, 8, 4)
 # define VINT32x8_AVG_DEFINED
@@ -3530,6 +3666,11 @@
 # define VUINT32x8_DIV_DEFINED
 #endif
 
+#ifndef VUINT32x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 32, 8, 4)
+# define VUINT32x8_MOD_DEFINED
+#endif
+
 #ifndef VUINT32x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 32, 8, 4)
 # define VUINT32x8_AVG_DEFINED
@@ -3654,6 +3795,11 @@
 # define VINT32x16_DIV_DEFINED
 #endif
 
+#ifndef VINT32x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 32, 16, 8)
+# define VINT32x16_MOD_DEFINED
+#endif
+
 #ifndef VINT32x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 32, 16, 8)
 # define VINT32x16_AVG_DEFINED
@@ -3778,6 +3924,11 @@
 # define VUINT32x16_DIV_DEFINED
 #endif
 
+#ifndef VUINT32x16_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 32, 16, 8)
+# define VUINT32x16_MOD_DEFINED
+#endif
+
 #ifndef VUINT32x16_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 32, 16, 8)
 # define VUINT32x16_AVG_DEFINED
@@ -3893,6 +4044,10 @@
 VEC_GENERIC_DIV(/* nothing */, 64, 2)
 # define VINT64x2_DIV_DEFINED
 #endif
+#ifndef VINT64x2_MOD_DEFINED
+VEC_GENERIC_MOD(/* nothing */, 64, 2)
+# define VINT64x2_MOD_DEFINED
+#endif
 #ifndef VINT64x2_AVG_DEFINED
 VEC_GENERIC_AVG(/* nothing */, 64, 2)
 # define VINT64x2_AVG_DEFINED
@@ -3993,6 +4148,10 @@
 VEC_GENERIC_DIV(u, 64, 2)
 # define VUINT64x2_DIV_DEFINED
 #endif
+#ifndef VUINT64x2_MOD_DEFINED
+VEC_GENERIC_MOD(u, 64, 2)
+# define VUINT64x2_MOD_DEFINED
+#endif
 #ifndef VUINT64x2_AVG_DEFINED
 VEC_GENERIC_AVG(u, 64, 2)
 # define VUINT64x2_AVG_DEFINED
@@ -4102,6 +4261,11 @@
 # define VINT64x4_DIV_DEFINED
 #endif
 
+#ifndef VINT64x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 64, 4, 2)
+# define VINT64x4_MOD_DEFINED
+#endif
+
 #ifndef VINT64x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 64, 4, 2)
 # define VINT64x4_AVG_DEFINED
@@ -4226,6 +4390,11 @@
 # define VUINT64x4_DIV_DEFINED
 #endif
 
+#ifndef VUINT64x4_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 64, 4, 2)
+# define VUINT64x4_MOD_DEFINED
+#endif
+
 #ifndef VUINT64x4_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 64, 4, 2)
 # define VUINT64x4_AVG_DEFINED
@@ -4350,6 +4519,11 @@
 # define VINT64x8_DIV_DEFINED
 #endif
 
+#ifndef VINT64x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(/* nothing */, 64, 8, 4)
+# define VINT64x8_MOD_DEFINED
+#endif
+
 #ifndef VINT64x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(/* nothing */, 64, 8, 4)
 # define VINT64x8_AVG_DEFINED
@@ -4474,6 +4648,11 @@
 # define VUINT64x8_DIV_DEFINED
 #endif
 
+#ifndef VUINT64x8_MOD_DEFINED
+VEC_GENERIC_DBL_MOD(u, 64, 8, 4)
+# define VUINT64x8_MOD_DEFINED
+#endif
+
 #ifndef VUINT64x8_AVG_DEFINED
 VEC_GENERIC_DBL_AVG(u, 64, 8, 4)
 # define VUINT64x8_AVG_DEFINED
--- a/include/vec/vec.h	Sat Apr 26 15:31:39 2025 -0400
+++ b/include/vec/vec.h	Sun Apr 27 02:49:53 2025 -0400
@@ -315,6 +315,10 @@
 # endif
 #endif
 
+#endif /* defined(VEC_SUPPRESS_HW) */
+
+#if VEC_GNUC_ATLEAST(4, 0, 0)
+# define VEC_COMPILER_HAS_GCC_VECTORS
 #endif
 
 #ifdef __cplusplus
@@ -353,17 +357,17 @@
 
 VEC_FUNC_IMPL vec_intmax vec_avg(vec_intmax x, vec_intmax y)
 {
-    vec_intmax x_d_rem    = (x % 2);
-    vec_intmax y_d_rem    = (y % 2);
-    vec_intmax rem_d_quot = ((x_d_rem + y_d_rem) / 2);
-    vec_intmax rem_d_rem  = ((x_d_rem + y_d_rem) % 2);
+	vec_intmax x_d_rem    = (x % 2);
+	vec_intmax y_d_rem    = (y % 2);
+	vec_intmax rem_d_quot = ((x_d_rem + y_d_rem) / 2);
+	vec_intmax rem_d_rem  = ((x_d_rem + y_d_rem) % 2);
 
-    return ((x / 2) + (y / 2)) + (rem_d_quot) + (rem_d_rem == 1);
+	return ((x / 2) + (y / 2)) + (rem_d_quot) + (rem_d_rem == 1);
 }
 
 VEC_FUNC_IMPL vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y)
 {
-    return (x >> 1) + (y >> 1) + ((x | y) & 1);
+	return (x >> 1) + (y >> 1) + ((x | y) & 1);
 }
 
 /* --------------------------------------------------------------- */
@@ -608,27 +612,45 @@
 
 // 16-bit
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(2))) gcc;
+#endif
 	vec_uint8 generic[2];
 } vuint8x2;
 
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(2))) gcc;
+#endif
 	vec_int8 generic[2];
 } vint8x2;
 
 // 32-bit
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(4))) gcc;
+#endif
 	vuint8x2 generic[2];
 } vuint8x4;
 
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint16 __attribute__((__vector_size__(4))) gcc;
+#endif
 	vec_uint16 generic[2];
 } vuint16x2;
 
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(4))) gcc;
+#endif
 	vint8x2 generic[2];
 } vint8x4;
 
 typedef union {
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int16 __attribute__((__vector_size__(4))) gcc;
+#endif
 	vec_int16 generic[2];
 } vint16x2;
 
@@ -640,6 +662,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint8x8_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vuint8x4 generic[2];
 } vuint8x8;
@@ -651,6 +676,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint16x4_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint16 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vuint16x2 generic[2];
 } vuint16x4;
@@ -662,6 +690,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint32x2_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint32 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vec_uint32 generic[2];
 } vuint32x2;
@@ -673,6 +704,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int8x8_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vint8x4 generic[2];
 } vint8x8;
@@ -684,6 +718,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int16x4_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int16 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vint16x2 generic[2];
 } vint16x4;
@@ -695,6 +732,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int32x2_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int32 __attribute__((__vector_size__(8))) gcc;
+#endif
 
 	vec_int32 generic[2];
 } vint32x2;
@@ -710,6 +750,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint8x16_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vuint8x8 generic[2];
 } vuint8x16;
 
@@ -723,6 +766,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint16x8_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint16 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vuint16x4 generic[2];
 } vuint16x8;
 
@@ -736,6 +782,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint32x4_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint32 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vuint32x2 generic[2];
 } vuint32x4;
 
@@ -749,6 +798,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	uint64x2_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint64 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vec_uint64 generic[2];
 } vuint64x2;
 
@@ -762,6 +814,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int8x16_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vint8x8 generic[2];
 } vint8x16;
 
@@ -775,6 +830,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int16x8_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int16 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vint16x4 generic[2];
 } vint16x8;
 
@@ -788,6 +846,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int32x4_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int32 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vint32x2 generic[2];
 } vint32x4;
 
@@ -801,6 +862,9 @@
 #ifdef VEC_COMPILER_HAS_NEON
 	int64x2_t neon;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int64 __attribute__((__vector_size__(16))) gcc;
+#endif
 	vec_int64 generic[2];
 } vint64x2;
 
@@ -809,6 +873,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vuint8x16 generic[2];
 } vuint8x32;
 
@@ -816,6 +883,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint16 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vuint16x8 generic[2];
 } vuint16x16;
 
@@ -823,6 +893,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint32 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vuint32x4 generic[2];
 } vuint32x8;
 
@@ -830,6 +903,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint64 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vuint64x2 generic[2];
 } vuint64x4;
 
@@ -837,6 +913,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vint8x16 generic[2];
 } vint8x32;
 
@@ -844,6 +923,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int16 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vint16x8 generic[2];
 } vint16x16;
 
@@ -851,6 +933,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int32 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vint32x4 generic[2];
 } vint32x8;
 
@@ -858,6 +943,9 @@
 #ifdef VEC_COMPILER_HAS_AVX2
 	__m256i avx2;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int64 __attribute__((__vector_size__(32))) gcc;
+#endif
 	vint64x2 generic[2];
 } vint64x4;
 
@@ -866,6 +954,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint8 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vuint8x32 generic[2];
 } vuint8x64;
 
@@ -873,6 +964,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint16 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vuint16x16 generic[2];
 } vuint16x32;
 
@@ -880,6 +974,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint32 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vuint32x8 generic[2];
 } vuint32x16;
 
@@ -887,6 +984,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_uint64 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vuint64x4 generic[2];
 } vuint64x8;
 
@@ -894,6 +994,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int8 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vint8x32 generic[2];
 } vint8x64;
 
@@ -901,6 +1004,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int16 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vint16x16 generic[2];
 } vint16x32;
 
@@ -908,6 +1014,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int32 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vint32x8 generic[2];
 } vint32x16;
 
@@ -915,6 +1024,9 @@
 #ifdef VEC_COMPILER_HAS_AVX512F
 	__m512i avx512f;
 #endif
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+	vec_int64 __attribute__((__vector_size__(64))) gcc;
+#endif
 	vint64x4 generic[2];
 } vint64x8;
 
@@ -949,6 +1061,10 @@
 # include "impl/x86/mmx.h"
 #endif
 
+#ifdef VEC_COMPILER_HAS_GCC_VECTORS
+# include "impl/gcc.h"
+#endif
+
 #include "impl/generic.h"
 
 /* ------------------------------------------------------------------------ */
--- a/test/Makefile.template	Sat Apr 26 15:31:39 2025 -0400
+++ b/test/Makefile.template	Sun Apr 27 02:49:53 2025 -0400
@@ -1,4 +1,4 @@
-CPPFLAGS += -O2 -I../include -Wall -Wpedantic -Werror=strict-aliasing
+CPPFLAGS += -g -O2 -I../include -Wall -Wpedantic -Werror=strict-aliasing
 CFLAGS += $(CPPFLAGS) -std=c99
 CXXFLAGS += $(CPPFLAGS) -std=c++11
 
@@ -16,6 +16,7 @@
 	../include/vec/impl/x86/sse41.h \
 	../include/vec/impl/x86/sse42.h \
 	../include/vec/impl/generic.h \
+	../include/vec/impl/gcc.h \
 	test_align.h \
 	test_arith.h \
 	test_compare.h \
--- a/test/test_arith.h	Sat Apr 26 15:31:39 2025 -0400
+++ b/test/test_arith.h	Sun Apr 27 02:49:53 2025 -0400
@@ -1,21 +1,21 @@
-#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \
-	static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##sign##int##bits##x##size b) \
+#define CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, secondsign, secondcsign) \
+	static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##secondsign##int##bits##x##size b) \
 	{ \
 		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \
+		V##secondcsign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \
 		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \
 	\
 		v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \
 	\
 		v##sign##int##bits##x##size##_store_aligned(a, orig_a); \
-		v##sign##int##bits##x##size##_store_aligned(b, orig_b); \
+		v##secondsign##int##bits##x##size##_store_aligned(b, orig_b); \
 		v##sign##int##bits##x##size##_store_aligned(c, orig_c); \
 	\
 		for (int i = 0; i < size; i++) { \
 			if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \
-				fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, equiv, orig_c[i]); \
+				fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, #equiv, (vec_##sign##int##bits)(equiv), orig_c[i]); \
 				print_v##sign##int##bits##x##size(stderr,a); \
-				print_v##sign##int##bits##x##size(stderr,b); \
+				print_v##secondsign##int##bits##x##size(stderr,b); \
 				print_v##sign##int##bits##x##size(stderr,c); \
 				fprintf(stderr, "\n"); \
 				return 1; \
@@ -25,38 +25,18 @@
 		return 0; \
 	}
 
+#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \
+	CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, sign, csign)
+
 #define CREATE_TEST_SHIFT(sign, psign, csign, bits, size, op, equiv) \
-	static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, vuint##bits##x##size b) \
-	{ \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \
-		VUINT##bits##x##size##_ALIGNED_ARRAY(orig_b); \
-		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \
-	\
-		v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \
-	\
-		v##sign##int##bits##x##size##_store_aligned(a, orig_a); \
-		vuint##bits##x##size##_store_aligned(b, orig_b); \
-		v##sign##int##bits##x##size##_store_aligned(c, orig_c); \
-	\
-		for (int i = 0; i < size; i++) { \
-			if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \
-				fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, (sign##int##bits##_t)(equiv), orig_c[i]); \
-				print_v##sign##int##bits##x##size(stderr,a); \
-				print_vuint##bits##x##size(stderr,b); \
-				print_v##sign##int##bits##x##size(stderr,c); \
-				fprintf(stderr, "\n"); \
-				return 1; \
-			} \
-		} \
-	\
-		return 0; \
-	}
+	CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, u, U)
 
 #define CREATE_TESTS_SIGN(sign, psign, csign, bits, size) \
 	CREATE_TEST(sign, psign, csign, bits, size, add, orig_a[i] + orig_b[i]) \
 	CREATE_TEST(sign, psign, csign, bits, size, sub, orig_a[i] - orig_b[i]) \
 	CREATE_TEST(sign, psign, csign, bits, size, mul, orig_a[i] * orig_b[i]) \
 	CREATE_TEST(sign, psign, csign, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
+	CREATE_TEST(sign, psign, csign, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \
 	CREATE_TEST(sign, psign, csign, bits, size, and, orig_a[i] & orig_b[i]) \
 	CREATE_TEST(sign, psign, csign, bits, size, or,  orig_a[i] | orig_b[i]) \
 	CREATE_TEST(sign, psign, csign, bits, size, xor, orig_a[i] ^ orig_b[i]) \
@@ -113,6 +93,7 @@
 			ret |= test_arith_v##sign##int##bits##x##size##_sub(a, b); \
 			ret |= test_arith_v##sign##int##bits##x##size##_mul(a, b); \
 			ret |= test_arith_v##sign##int##bits##x##size##_div(a, b); \
+			ret |= test_arith_v##sign##int##bits##x##size##_mod(a, b); \
 			ret |= test_arith_v##sign##int##bits##x##size##_and(a, b); \
 			ret |= test_arith_v##sign##int##bits##x##size##_or(a, b); \
 			ret |= test_arith_v##sign##int##bits##x##size##_xor(a, b); \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/gengcc.c	Sun Apr 27 02:49:53 2025 -0400
@@ -0,0 +1,390 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+/* Use this file to generate include/vec/impl/generic.h !!
+ *
+ * `gcc -o gengeneric gengeneric.c` */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+/* ------------------------------------------------------------------------ */
+
+enum op {
+	/* return vector, take in a integer */
+	OP_SPLAT = 0,
+
+	/* return vector, take in an array */
+	OP_LOAD_ALIGNED,
+	OP_LOAD,
+
+	/* void, take in vector and array */
+	OP_STORE_ALIGNED,
+	OP_STORE,
+
+	/* return vector, takes in two vectors */
+	OP_ADD,
+	OP_SUB,
+	OP_MUL,
+	OP_AND,
+	OP_OR,
+	OP_XOR,
+	OP_CMPLT,
+	OP_CMPEQ,
+	OP_CMPGT,
+	OP_CMPLE,
+	OP_CMPGE,
+	OP_MIN,
+	OP_MAX,
+	OP_AVG,
+
+	/* return vector, takes in a vector and an explicitly unsigned vector */
+	OP_LSHIFT,
+	OP_RSHIFT,
+	OP_LRSHIFT,
+
+	/* return vector, takes in a vector */
+	OP_NOT,
+
+	OP_FINAL_,
+
+	/* operations that have some sort of "caveat" should go here, until
+	 * they are fixed or removed */
+
+	OP_DIV, /* this one causes a floating point exception on my machine.
+	         * possibly we could change the behavior of divide-by-zero
+	         * with some gcc pragma ?  --paper */
+	OP_MOD, /* ditto with the above */
+};
+
+/* convert op -> string */
+static struct {
+	const char *u;
+	const char *l;
+} op_names[] = {
+	[OP_SPLAT] = {"SPLAT", "splat"},
+	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"},
+	[OP_LOAD] = {"LOAD", "load"},
+	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"},
+	[OP_STORE] = {"STORE", "store"},
+	[OP_ADD] = {"ADD", "add"},
+	[OP_SUB] = {"SUB", "sub"},
+	[OP_MUL] = {"MUL", "mul"},
+	/*[OP_DIV] = {"DIV", "div"},*/
+	[OP_AVG] = {"AVG", "avg"},
+	[OP_AND] = {"AND", "and"},
+	[OP_OR] = {"OR", "or"},
+	[OP_XOR] = {"XOR", "xor"},
+	[OP_NOT] = {"NOT", "not"},
+	[OP_CMPLT] = {"CMPLT", "cmplt"},
+	[OP_CMPEQ] = {"CMPEQ", "cmpeq"},
+	[OP_CMPGT] = {"CMPGT", "cmpgt"},
+	[OP_CMPLE] = {"CMPLE", "cmple"},
+	[OP_CMPGE] = {"CMPGE", "cmpge"},
+	[OP_MIN] = {"MIN", "min"},
+	[OP_MAX] = {"MAX", "max"},
+	[OP_RSHIFT] = {"RSHIFT", "rshift"},
+	[OP_LRSHIFT] = {"LRSHIFT", "lrshift"},
+	[OP_LSHIFT] = {"LSHIFT", "lshift"},
+};
+
+#define UPSIGN(x) ((x) ? "" : "U")
+#define LOSIGN(x) ((x) ? "" : "u")
+
+static void print_gcc_op(enum op op, int is_signed, int bits, int size)
+{
+	int i;
+
+	printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
+
+	printf("VEC_FUNC_IMPL ");
+
+	/* first; the return value */
+	switch (op) {
+	case OP_SPLAT:
+	case OP_LOAD_ALIGNED:
+	case OP_LOAD:
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_DIV:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+	case OP_CMPLE:
+	case OP_CMPGE:
+	case OP_MIN:
+	case OP_MAX:
+	case OP_AVG:
+	case OP_RSHIFT:
+	case OP_LRSHIFT:
+	case OP_LSHIFT:
+	case OP_NOT:
+		printf("v%sint%dx%d", LOSIGN(is_signed), bits, size);
+		break;
+	case OP_STORE_ALIGNED:
+	case OP_STORE:
+		printf("void");
+		break;
+	}
+
+	/* whitespace and function name */
+	printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l);
+
+	/* parameters */
+	switch (op) {
+	case OP_SPLAT:
+		printf("vec_%sint%d x", LOSIGN(is_signed), bits);
+		break;
+	case OP_LOAD_ALIGNED:
+	case OP_LOAD:
+		printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size);
+		break;
+	case OP_STORE_ALIGNED:
+	case OP_STORE:
+		printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
+		break;
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_DIV:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+	case OP_CMPLE:
+	case OP_CMPGE:
+	case OP_MIN:
+	case OP_MAX:
+	case OP_AVG:
+		printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
+		break;
+	case OP_RSHIFT:
+	case OP_LRSHIFT:
+	case OP_LSHIFT:
+		printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size);
+		break;
+	case OP_NOT:
+		printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size);
+		break;
+	}
+
+	puts(")\n{");
+
+	switch (op) {
+	case OP_SPLAT:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		printf("\tvec.gcc = (__typeof__(vec.gcc)){");
+		for (i = 0; i < size; i++)
+			printf("x,");
+		printf("};\n");
+		printf("\treturn vec;\n");
+		break;
+	case OP_LOAD_ALIGNED:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;");
+		printf("\treturn vec;\n");
+		break;
+	case OP_LOAD:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		puts("\tmemcpy(&vec, x, sizeof(vec));");
+		printf("\treturn vec;\n");
+		break;
+	case OP_STORE_ALIGNED:
+		puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;");
+		break;
+	case OP_STORE:
+		puts("\tmemcpy(arr, &vec, sizeof(vec));");
+		break;
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_DIV:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+	case OP_CMPLE:
+	case OP_CMPGE: {
+		const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="};
+
+		printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]);
+		printf("\treturn vec1;\n");
+		break;
+	}
+
+	case OP_LSHIFT:
+	case OP_RSHIFT: {
+		const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"};
+
+		printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]);
+		printf("\treturn vec1;\n");
+		break;
+	}
+
+	case OP_LRSHIFT: {
+		/* sigh */
+		printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8);
+		printf("\treturn vec1;\n");
+		break;
+	}
+	case OP_MIN:
+	case OP_MAX: {
+		const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"};
+
+		printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size);
+		printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]);
+		printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n");
+		printf("\treturn vec1;\n");
+		break;
+	}
+	case OP_AVG:
+		if (is_signed) {
+			printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size);
+			printf("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);\n");
+			printf("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);\n");
+			printf("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);\n");
+			printf("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);\n");
+			puts("");
+			printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n");
+		} else {
+			printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n");
+		}
+
+		printf("\treturn vec1;\n");
+		break;
+	case OP_NOT:
+		printf("\tvec.gcc = ~vec.gcc;\n");
+		printf("\treturn vec;\n");
+		break;
+	default:
+		printf("#error implement this operation");
+		break;
+	}
+
+	/* end function definition */
+	puts("}");
+
+	printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
+	puts("#endif");
+}
+
+static inline void print_ops(int is_signed, int bits, int size)
+{
+	int i;
+
+	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
+
+	for (i = 0; i < OP_FINAL_; i++)
+		print_gcc_op(i, is_signed, bits, size);
+}
+
+static const char *header =
+	"/**\n"
+	" * vec - a tiny SIMD vector library in C99\n"
+	" * \n"
+	" * Copyright (c) 2024-2025 Paper\n"
+	" * \n"
+	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
+	" * of this software and associated documentation files (the \"Software\"), to deal\n"
+	" * in the Software without restriction, including without limitation the rights\n"
+	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+	" * copies of the Software, and to permit persons to whom the Software is\n"
+	" * furnished to do so, subject to the following conditions:\n"
+	" * \n"
+	" * The above copyright notice and this permission notice shall be included in all\n"
+	" * copies or substantial portions of the Software.\n"
+	" * \n"
+	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
+	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
+	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
+	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
+	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
+	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
+	" * SOFTWARE.\n"
+	"**/\n"
+	"\n"
+	"/* This file is automatically generated! Do not edit it directly!\n"
+	" * Edit the code that generates it in utils/gengcc.c  --paper */\n"
+	"\n"
+	"#ifndef VEC_IMPL_GCC_H_\n"
+	"#define VEC_IMPL_GCC_H_\n"
+	"\n";
+
+static const char *footer = 
+	"#endif /* VEC_IMPL_GCC_H_ */\n";
+
+int main(void)
+{
+	static struct {
+		int bits, size;
+	} defs[] = {
+		/* -- 8-bit */
+		{8, 2},
+		{8, 4},
+		{8, 8},
+		{8, 16},
+		{8, 32},
+		{8, 64},
+
+		/* -- 16-bit */
+		{16, 2},
+		{16, 4},
+		{16, 8},
+		{16, 16},
+		{16, 32},
+
+		/* -- 32-bit */
+		{32, 2},
+		{32, 4},
+		{32, 8},
+		{32, 16},
+
+		/* -- 64-bit */
+		{64, 2},
+		{64, 4},
+		{64, 8},
+	};
+	int i;
+
+	puts(header);
+
+	for (i = 0; i < ARRAY_SIZE(defs); i++) {
+		print_ops(1, defs[i].bits, defs[i].size);
+		print_ops(0, defs[i].bits, defs[i].size);
+	}
+
+	puts(footer);
+}
--- a/utils/gengeneric.c	Sat Apr 26 15:31:39 2025 -0400
+++ b/utils/gengeneric.c	Sun Apr 27 02:49:53 2025 -0400
@@ -137,6 +137,12 @@
 	"		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \\\n"
 	"	}\n"
 	"\n"
+	"#define VEC_GENERIC_MOD(sign, bits, size) \\\n"
+	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
+	"	{ \\\n"
+	"		VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \\\n"
+	"	}\n"
+	"\n"
 	"#define VEC_GENERIC_AVG(sign, bits, size) \\\n"
 	"	VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
 	"	{ \\\n"
@@ -288,6 +294,7 @@
 	"#define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)\n"
 	"#define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)\n"
 	"#define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)\n"
+	"#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)\n"
 	"#define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)\n"
 	"#define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)\n"
 	"#define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)\n"
@@ -355,6 +362,7 @@
 		"SUB",
 		"MUL",
 		"DIV",
+		"MOD",
 		"AVG",
 		"AND",
 		"OR",