# HG changeset patch # User Paper # Date 1745736593 14400 # Node ID 55cadb1fac4b902f407f672c843bc2bfb71ab6e6 # Parent f9ca85d2f14c7ef86ee7bdfafc856d83c9edec32 *: add mod operation, add GCC vector backend need to test it with old gcc though. :) diff -r f9ca85d2f14c -r 55cadb1fac4b README --- a/README Sat Apr 26 15:31:39 2025 -0400 +++ b/README Sun Apr 27 02:49:53 2025 -0400 @@ -63,6 +63,10 @@ considered defined behavior and should result in a zero; if this doesn't happen it's considered a bug + v[u]intAxB mod(v[u]intAxB vec1, v[u]intAxB vec2) + gives the remainder of a division operation. as with div, + divide-by-zero is defined behavior. + v[u]intAxB and(v[u]intAxB vec1, v[u]intAxB vec2) bitwise AND (&) of the values in both vectors @@ -87,7 +91,9 @@ v[u]intAxB avg(v[u]intAxB vec1, v[u]intAxB vec2) returns the average of the values in both vectors i.e., div(add(vec1, vec2), splat(2)), without - the possibility of overflow. + the possibility of overflow. If you are familiar + with AltiVec, this operation exactly mimics + vec_avg. v[u]intAxB min(v[u]intAxB vec1, v[u]intAxB vec2) returns the minimum of the values in both vectors diff -r f9ca85d2f14c -r 55cadb1fac4b include/vec/impl/gcc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/vec/impl/gcc.h Sun Apr 27 02:49:53 2025 -0400 @@ -0,0 +1,7089 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +/* This file is automatically generated! Do not edit it directly! + * Edit the code that generates it in utils/gengcc.c --paper */ + +#ifndef VEC_IMPL_GCC_H_ +#define VEC_IMPL_GCC_H_ + + + + +/* vuint8x2 */ + +#ifndef VINT8x2_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_splat(vec_int8 x) +{ + vint8x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VINT8x2_SPLAT_DEFINED +#endif +#ifndef VINT8x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_load_aligned(const vec_int8 x[2]) +{ + vint8x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x2_LOAD_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_load(const vec_int8 x[2]) +{ + vint8x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x2_LOAD_DEFINED +#endif +#ifndef VINT8x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x2_store_aligned(vint8x2 vec, vec_int8 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x2_STORE_DEFINED +VEC_FUNC_IMPL void vint8x2_store(vint8x2 vec, vec_int8 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x2_STORE_DEFINED +#endif +#ifndef VINT8x2_ADD_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_add(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x2_ADD_DEFINED +#endif +#ifndef VINT8x2_SUB_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_sub(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x2_SUB_DEFINED +#endif +#ifndef VINT8x2_MUL_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_mul(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x2_MUL_DEFINED +#endif +#ifndef VINT8x2_AND_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_and(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x2_AND_DEFINED +#endif +#ifndef VINT8x2_OR_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_or(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x2_OR_DEFINED +#endif +#ifndef VINT8x2_XOR_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_xor(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x2_XOR_DEFINED +#endif +#ifndef VINT8x2_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_cmplt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x2_CMPLT_DEFINED +#endif +#ifndef VINT8x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_cmpeq(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x2_CMPEQ_DEFINED +#endif +#ifndef VINT8x2_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_cmpgt(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x2_CMPGT_DEFINED +#endif +#ifndef VINT8x2_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_cmple(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x2_CMPLE_DEFINED +#endif +#ifndef VINT8x2_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_cmpge(vint8x2 vec1, vint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x2_CMPGE_DEFINED +#endif +#ifndef VINT8x2_MIN_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_min(vint8x2 vec1, vint8x2 vec2) +{ + vint8x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x2_MIN_DEFINED +#endif +#ifndef VINT8x2_MAX_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_max(vint8x2 vec1, vint8x2 vec2) +{ + vint8x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x2_MAX_DEFINED +#endif +#ifndef VINT8x2_AVG_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_avg(vint8x2 vec1, vint8x2 vec2) +{ + vint8x2 ones = vint8x2_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x2_AVG_DEFINED +#endif +#ifndef VINT8x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_lshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x2_LSHIFT_DEFINED +#endif +#ifndef VINT8x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_rshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x2_RSHIFT_DEFINED +#endif +#ifndef VINT8x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_lrshift(vint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x2_LRSHIFT_DEFINED +#endif +#ifndef VINT8x2_NOT_DEFINED +VEC_FUNC_IMPL vint8x2 vint8x2_not(vint8x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x2_NOT_DEFINED +#endif + + +/* vint8x2 */ + +#ifndef VUINT8x2_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_splat(vec_uint8 x) +{ + vuint8x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VUINT8x2_SPLAT_DEFINED +#endif +#ifndef VUINT8x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_load_aligned(const vec_uint8 x[2]) +{ + vuint8x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x2_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_load(const vec_uint8 x[2]) +{ + vuint8x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x2_LOAD_DEFINED +#endif +#ifndef VUINT8x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x2_store_aligned(vuint8x2 vec, vec_uint8 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x2_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x2_store(vuint8x2 vec, vec_uint8 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x2_STORE_DEFINED +#endif +#ifndef VUINT8x2_ADD_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_add(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x2_ADD_DEFINED +#endif +#ifndef VUINT8x2_SUB_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_sub(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x2_SUB_DEFINED +#endif +#ifndef VUINT8x2_MUL_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_mul(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x2_MUL_DEFINED +#endif +#ifndef VUINT8x2_AND_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_and(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x2_AND_DEFINED +#endif +#ifndef VUINT8x2_OR_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_or(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x2_OR_DEFINED +#endif +#ifndef VUINT8x2_XOR_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_xor(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x2_XOR_DEFINED +#endif +#ifndef VUINT8x2_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmplt(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x2_CMPLT_DEFINED +#endif +#ifndef VUINT8x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpeq(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x2_CMPEQ_DEFINED +#endif +#ifndef VUINT8x2_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpgt(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x2_CMPGT_DEFINED +#endif +#ifndef VUINT8x2_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmple(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x2_CMPLE_DEFINED +#endif +#ifndef VUINT8x2_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_cmpge(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x2_CMPGE_DEFINED +#endif +#ifndef VUINT8x2_MIN_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_min(vuint8x2 vec1, vuint8x2 vec2) +{ + vuint8x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x2_MIN_DEFINED +#endif +#ifndef VUINT8x2_MAX_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_max(vuint8x2 vec1, vuint8x2 vec2) +{ + vuint8x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x2_MAX_DEFINED +#endif +#ifndef VUINT8x2_AVG_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_avg(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x2_AVG_DEFINED +#endif +#ifndef VUINT8x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_lshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x2_LSHIFT_DEFINED +#endif +#ifndef VUINT8x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_rshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x2_RSHIFT_DEFINED +#endif +#ifndef VUINT8x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_lrshift(vuint8x2 vec1, vuint8x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(2))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x2_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x2_NOT_DEFINED +VEC_FUNC_IMPL vuint8x2 vuint8x2_not(vuint8x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x2_NOT_DEFINED +#endif + + +/* vuint8x4 */ + +#ifndef VINT8x4_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_splat(vec_int8 x) +{ + vint8x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VINT8x4_SPLAT_DEFINED +#endif +#ifndef VINT8x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_load_aligned(const vec_int8 x[4]) +{ + vint8x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x4_LOAD_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_load(const vec_int8 x[4]) +{ + vint8x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x4_LOAD_DEFINED +#endif +#ifndef VINT8x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x4_store_aligned(vint8x4 vec, vec_int8 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x4_STORE_DEFINED +VEC_FUNC_IMPL void vint8x4_store(vint8x4 vec, vec_int8 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x4_STORE_DEFINED +#endif +#ifndef VINT8x4_ADD_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_add(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x4_ADD_DEFINED +#endif +#ifndef VINT8x4_SUB_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_sub(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x4_SUB_DEFINED +#endif +#ifndef VINT8x4_MUL_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_mul(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x4_MUL_DEFINED +#endif +#ifndef VINT8x4_AND_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_and(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x4_AND_DEFINED +#endif +#ifndef VINT8x4_OR_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_or(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x4_OR_DEFINED +#endif +#ifndef VINT8x4_XOR_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_xor(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x4_XOR_DEFINED +#endif +#ifndef VINT8x4_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_cmplt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x4_CMPLT_DEFINED +#endif +#ifndef VINT8x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_cmpeq(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x4_CMPEQ_DEFINED +#endif +#ifndef VINT8x4_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_cmpgt(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x4_CMPGT_DEFINED +#endif +#ifndef VINT8x4_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_cmple(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x4_CMPLE_DEFINED +#endif +#ifndef VINT8x4_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_cmpge(vint8x4 vec1, vint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x4_CMPGE_DEFINED +#endif +#ifndef VINT8x4_MIN_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_min(vint8x4 vec1, vint8x4 vec2) +{ + vint8x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x4_MIN_DEFINED +#endif +#ifndef VINT8x4_MAX_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_max(vint8x4 vec1, vint8x4 vec2) +{ + vint8x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x4_MAX_DEFINED +#endif +#ifndef VINT8x4_AVG_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_avg(vint8x4 vec1, vint8x4 vec2) +{ + vint8x4 ones = vint8x4_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x4_AVG_DEFINED +#endif +#ifndef VINT8x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_lshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x4_LSHIFT_DEFINED +#endif +#ifndef VINT8x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_rshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x4_RSHIFT_DEFINED +#endif +#ifndef VINT8x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_lrshift(vint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x4_LRSHIFT_DEFINED +#endif +#ifndef VINT8x4_NOT_DEFINED +VEC_FUNC_IMPL vint8x4 vint8x4_not(vint8x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x4_NOT_DEFINED +#endif + + +/* vint8x4 */ + +#ifndef VUINT8x4_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_splat(vec_uint8 x) +{ + vuint8x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VUINT8x4_SPLAT_DEFINED +#endif +#ifndef VUINT8x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_load_aligned(const vec_uint8 x[4]) +{ + vuint8x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x4_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_load(const vec_uint8 x[4]) +{ + vuint8x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x4_LOAD_DEFINED +#endif +#ifndef VUINT8x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x4_store_aligned(vuint8x4 vec, vec_uint8 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x4_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x4_store(vuint8x4 vec, vec_uint8 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x4_STORE_DEFINED +#endif +#ifndef VUINT8x4_ADD_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_add(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x4_ADD_DEFINED +#endif +#ifndef VUINT8x4_SUB_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_sub(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x4_SUB_DEFINED +#endif +#ifndef VUINT8x4_MUL_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_mul(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x4_MUL_DEFINED +#endif +#ifndef VUINT8x4_AND_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_and(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x4_AND_DEFINED +#endif +#ifndef VUINT8x4_OR_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_or(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x4_OR_DEFINED +#endif +#ifndef VUINT8x4_XOR_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_xor(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x4_XOR_DEFINED +#endif +#ifndef VUINT8x4_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmplt(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x4_CMPLT_DEFINED +#endif +#ifndef VUINT8x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpeq(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x4_CMPEQ_DEFINED +#endif +#ifndef VUINT8x4_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpgt(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x4_CMPGT_DEFINED +#endif +#ifndef VUINT8x4_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmple(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x4_CMPLE_DEFINED +#endif +#ifndef VUINT8x4_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_cmpge(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x4_CMPGE_DEFINED +#endif +#ifndef VUINT8x4_MIN_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_min(vuint8x4 vec1, vuint8x4 vec2) +{ + vuint8x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x4_MIN_DEFINED +#endif +#ifndef VUINT8x4_MAX_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_max(vuint8x4 vec1, vuint8x4 vec2) +{ + vuint8x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x4_MAX_DEFINED +#endif +#ifndef VUINT8x4_AVG_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_avg(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x4_AVG_DEFINED +#endif +#ifndef VUINT8x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_lshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x4_LSHIFT_DEFINED +#endif +#ifndef VUINT8x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_rshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x4_RSHIFT_DEFINED +#endif +#ifndef VUINT8x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_lrshift(vuint8x4 vec1, vuint8x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x4_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x4_NOT_DEFINED +VEC_FUNC_IMPL vuint8x4 vuint8x4_not(vuint8x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x4_NOT_DEFINED +#endif + + +/* vuint8x8 */ + +#ifndef VINT8x8_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_splat(vec_int8 x) +{ + vint8x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT8x8_SPLAT_DEFINED +#endif +#ifndef VINT8x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_load_aligned(const vec_int8 x[8]) +{ + vint8x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x8_LOAD_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_load(const vec_int8 x[8]) +{ + vint8x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x8_LOAD_DEFINED +#endif +#ifndef VINT8x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x8_store_aligned(vint8x8 vec, vec_int8 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x8_STORE_DEFINED +VEC_FUNC_IMPL void vint8x8_store(vint8x8 vec, vec_int8 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x8_STORE_DEFINED +#endif +#ifndef VINT8x8_ADD_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_add(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x8_ADD_DEFINED +#endif +#ifndef VINT8x8_SUB_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_sub(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x8_SUB_DEFINED +#endif +#ifndef VINT8x8_MUL_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_mul(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x8_MUL_DEFINED +#endif +#ifndef VINT8x8_AND_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_and(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x8_AND_DEFINED +#endif +#ifndef VINT8x8_OR_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_or(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x8_OR_DEFINED +#endif +#ifndef VINT8x8_XOR_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_xor(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x8_XOR_DEFINED +#endif +#ifndef VINT8x8_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_cmplt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x8_CMPLT_DEFINED +#endif +#ifndef VINT8x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_cmpeq(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x8_CMPEQ_DEFINED +#endif +#ifndef VINT8x8_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_cmpgt(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x8_CMPGT_DEFINED +#endif +#ifndef VINT8x8_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_cmple(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x8_CMPLE_DEFINED +#endif +#ifndef VINT8x8_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_cmpge(vint8x8 vec1, vint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x8_CMPGE_DEFINED +#endif +#ifndef VINT8x8_MIN_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_min(vint8x8 vec1, vint8x8 vec2) +{ + vint8x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x8_MIN_DEFINED +#endif +#ifndef VINT8x8_MAX_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_max(vint8x8 vec1, vint8x8 vec2) +{ + vint8x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x8_MAX_DEFINED +#endif +#ifndef VINT8x8_AVG_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_avg(vint8x8 vec1, vint8x8 vec2) +{ + vint8x8 ones = vint8x8_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x8_AVG_DEFINED +#endif +#ifndef VINT8x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_lshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x8_LSHIFT_DEFINED +#endif +#ifndef VINT8x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_rshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x8_RSHIFT_DEFINED +#endif +#ifndef VINT8x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_lrshift(vint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x8_LRSHIFT_DEFINED +#endif +#ifndef VINT8x8_NOT_DEFINED +VEC_FUNC_IMPL vint8x8 vint8x8_not(vint8x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x8_NOT_DEFINED +#endif + + +/* vint8x8 */ + +#ifndef VUINT8x8_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_splat(vec_uint8 x) +{ + vuint8x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT8x8_SPLAT_DEFINED +#endif +#ifndef VUINT8x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_load_aligned(const vec_uint8 x[8]) +{ + vuint8x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x8_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_load(const vec_uint8 x[8]) +{ + vuint8x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x8_LOAD_DEFINED +#endif +#ifndef VUINT8x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x8_store_aligned(vuint8x8 vec, vec_uint8 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x8_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x8_store(vuint8x8 vec, vec_uint8 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x8_STORE_DEFINED +#endif +#ifndef VUINT8x8_ADD_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_add(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x8_ADD_DEFINED +#endif +#ifndef VUINT8x8_SUB_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_sub(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x8_SUB_DEFINED +#endif +#ifndef VUINT8x8_MUL_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_mul(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x8_MUL_DEFINED +#endif +#ifndef VUINT8x8_AND_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_and(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x8_AND_DEFINED +#endif +#ifndef VUINT8x8_OR_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_or(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x8_OR_DEFINED +#endif +#ifndef VUINT8x8_XOR_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_xor(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x8_XOR_DEFINED +#endif +#ifndef VUINT8x8_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmplt(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x8_CMPLT_DEFINED +#endif +#ifndef VUINT8x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpeq(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x8_CMPEQ_DEFINED +#endif +#ifndef VUINT8x8_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpgt(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x8_CMPGT_DEFINED +#endif +#ifndef VUINT8x8_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmple(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x8_CMPLE_DEFINED +#endif +#ifndef VUINT8x8_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_cmpge(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x8_CMPGE_DEFINED +#endif +#ifndef VUINT8x8_MIN_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_min(vuint8x8 vec1, vuint8x8 vec2) +{ + vuint8x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x8_MIN_DEFINED +#endif +#ifndef VUINT8x8_MAX_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_max(vuint8x8 vec1, vuint8x8 vec2) +{ + vuint8x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x8_MAX_DEFINED +#endif +#ifndef VUINT8x8_AVG_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_avg(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x8_AVG_DEFINED +#endif +#ifndef VUINT8x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_lshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x8_LSHIFT_DEFINED +#endif +#ifndef VUINT8x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_rshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x8_RSHIFT_DEFINED +#endif +#ifndef VUINT8x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_lrshift(vuint8x8 vec1, vuint8x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x8_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x8_NOT_DEFINED +VEC_FUNC_IMPL vuint8x8 vuint8x8_not(vuint8x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x8_NOT_DEFINED +#endif + + +/* vuint8x16 */ + +#ifndef VINT8x16_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_splat(vec_int8 x) +{ + vint8x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT8x16_SPLAT_DEFINED +#endif +#ifndef VINT8x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_load_aligned(const vec_int8 x[16]) +{ + vint8x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x16_LOAD_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_load(const vec_int8 x[16]) +{ + vint8x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x16_LOAD_DEFINED +#endif +#ifndef VINT8x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x16_store_aligned(vint8x16 vec, vec_int8 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x16_STORE_DEFINED +VEC_FUNC_IMPL void vint8x16_store(vint8x16 vec, vec_int8 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x16_STORE_DEFINED +#endif +#ifndef VINT8x16_ADD_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_add(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x16_ADD_DEFINED +#endif +#ifndef VINT8x16_SUB_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_sub(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x16_SUB_DEFINED +#endif +#ifndef VINT8x16_MUL_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_mul(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x16_MUL_DEFINED +#endif +#ifndef VINT8x16_AND_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_and(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x16_AND_DEFINED +#endif +#ifndef VINT8x16_OR_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_or(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x16_OR_DEFINED +#endif +#ifndef VINT8x16_XOR_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_xor(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x16_XOR_DEFINED +#endif +#ifndef VINT8x16_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_cmplt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x16_CMPLT_DEFINED +#endif +#ifndef VINT8x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_cmpeq(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x16_CMPEQ_DEFINED +#endif +#ifndef VINT8x16_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_cmpgt(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x16_CMPGT_DEFINED +#endif +#ifndef VINT8x16_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_cmple(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x16_CMPLE_DEFINED +#endif +#ifndef VINT8x16_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_cmpge(vint8x16 vec1, vint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x16_CMPGE_DEFINED +#endif +#ifndef VINT8x16_MIN_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_min(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x16_MIN_DEFINED +#endif +#ifndef VINT8x16_MAX_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_max(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x16_MAX_DEFINED +#endif +#ifndef VINT8x16_AVG_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_avg(vint8x16 vec1, vint8x16 vec2) +{ + vint8x16 ones = vint8x16_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x16_AVG_DEFINED +#endif +#ifndef VINT8x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_lshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x16_LSHIFT_DEFINED +#endif +#ifndef VINT8x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_rshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x16_RSHIFT_DEFINED +#endif +#ifndef VINT8x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_lrshift(vint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x16_LRSHIFT_DEFINED +#endif +#ifndef VINT8x16_NOT_DEFINED +VEC_FUNC_IMPL vint8x16 vint8x16_not(vint8x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x16_NOT_DEFINED +#endif + + +/* vint8x16 */ + +#ifndef VUINT8x16_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_splat(vec_uint8 x) +{ + vuint8x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT8x16_SPLAT_DEFINED +#endif +#ifndef VUINT8x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_load_aligned(const vec_uint8 x[16]) +{ + vuint8x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x16_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_load(const vec_uint8 x[16]) +{ + vuint8x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x16_LOAD_DEFINED +#endif +#ifndef VUINT8x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x16_store_aligned(vuint8x16 vec, vec_uint8 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x16_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x16_store(vuint8x16 vec, vec_uint8 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x16_STORE_DEFINED +#endif +#ifndef VUINT8x16_ADD_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_add(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x16_ADD_DEFINED +#endif +#ifndef VUINT8x16_SUB_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_sub(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x16_SUB_DEFINED +#endif +#ifndef VUINT8x16_MUL_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_mul(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x16_MUL_DEFINED +#endif +#ifndef VUINT8x16_AND_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_and(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x16_AND_DEFINED +#endif +#ifndef VUINT8x16_OR_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_or(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x16_OR_DEFINED +#endif +#ifndef VUINT8x16_XOR_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_xor(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x16_XOR_DEFINED +#endif +#ifndef VUINT8x16_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmplt(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x16_CMPLT_DEFINED +#endif +#ifndef VUINT8x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpeq(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x16_CMPEQ_DEFINED +#endif +#ifndef VUINT8x16_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpgt(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x16_CMPGT_DEFINED +#endif +#ifndef VUINT8x16_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmple(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x16_CMPLE_DEFINED +#endif +#ifndef VUINT8x16_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_cmpge(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x16_CMPGE_DEFINED +#endif +#ifndef VUINT8x16_MIN_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_min(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x16_MIN_DEFINED +#endif +#ifndef VUINT8x16_MAX_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_max(vuint8x16 vec1, vuint8x16 vec2) +{ + vuint8x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x16_MAX_DEFINED +#endif +#ifndef VUINT8x16_AVG_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_avg(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x16_AVG_DEFINED +#endif +#ifndef VUINT8x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_lshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x16_LSHIFT_DEFINED +#endif +#ifndef VUINT8x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_rshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x16_RSHIFT_DEFINED +#endif +#ifndef VUINT8x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_lrshift(vuint8x16 vec1, vuint8x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x16_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x16_NOT_DEFINED +VEC_FUNC_IMPL vuint8x16 vuint8x16_not(vuint8x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x16_NOT_DEFINED +#endif + + +/* vuint8x32 */ + +#ifndef VINT8x32_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_splat(vec_int8 x) +{ + vint8x32 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT8x32_SPLAT_DEFINED +#endif +#ifndef VINT8x32_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_load_aligned(const vec_int8 x[32]) +{ + vint8x32 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x32_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x32_LOAD_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_load(const vec_int8 x[32]) +{ + vint8x32 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x32_LOAD_DEFINED +#endif +#ifndef VINT8x32_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x32_store_aligned(vint8x32 vec, vec_int8 arr[32]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x32_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x32_STORE_DEFINED +VEC_FUNC_IMPL void vint8x32_store(vint8x32 vec, vec_int8 arr[32]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x32_STORE_DEFINED +#endif +#ifndef VINT8x32_ADD_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_add(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x32_ADD_DEFINED +#endif +#ifndef VINT8x32_SUB_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_sub(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x32_SUB_DEFINED +#endif +#ifndef VINT8x32_MUL_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_mul(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x32_MUL_DEFINED +#endif +#ifndef VINT8x32_AND_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_and(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x32_AND_DEFINED +#endif +#ifndef VINT8x32_OR_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_or(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x32_OR_DEFINED +#endif +#ifndef VINT8x32_XOR_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_xor(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x32_XOR_DEFINED +#endif +#ifndef VINT8x32_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_cmplt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x32_CMPLT_DEFINED +#endif +#ifndef VINT8x32_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_cmpeq(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x32_CMPEQ_DEFINED +#endif +#ifndef VINT8x32_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_cmpgt(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x32_CMPGT_DEFINED +#endif +#ifndef VINT8x32_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_cmple(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x32_CMPLE_DEFINED +#endif +#ifndef VINT8x32_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_cmpge(vint8x32 vec1, vint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x32_CMPGE_DEFINED +#endif +#ifndef VINT8x32_MIN_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_min(vint8x32 vec1, vint8x32 vec2) +{ + vint8x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x32_MIN_DEFINED +#endif +#ifndef VINT8x32_MAX_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_max(vint8x32 vec1, vint8x32 vec2) +{ + vint8x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x32_MAX_DEFINED +#endif +#ifndef VINT8x32_AVG_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_avg(vint8x32 vec1, vint8x32 vec2) +{ + vint8x32 ones = vint8x32_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x32_AVG_DEFINED +#endif +#ifndef VINT8x32_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_lshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x32_LSHIFT_DEFINED +#endif +#ifndef VINT8x32_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_rshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x32_RSHIFT_DEFINED +#endif +#ifndef VINT8x32_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_lrshift(vint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x32_LRSHIFT_DEFINED +#endif +#ifndef VINT8x32_NOT_DEFINED +VEC_FUNC_IMPL vint8x32 vint8x32_not(vint8x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x32_NOT_DEFINED +#endif + + +/* vint8x32 */ + +#ifndef VUINT8x32_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_splat(vec_uint8 x) +{ + vuint8x32 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT8x32_SPLAT_DEFINED +#endif +#ifndef VUINT8x32_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_load_aligned(const vec_uint8 x[32]) +{ + vuint8x32 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x32_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x32_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_load(const vec_uint8 x[32]) +{ + vuint8x32 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x32_LOAD_DEFINED +#endif +#ifndef VUINT8x32_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x32_store_aligned(vuint8x32 vec, vec_uint8 arr[32]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x32_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x32_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x32_store(vuint8x32 vec, vec_uint8 arr[32]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x32_STORE_DEFINED +#endif +#ifndef VUINT8x32_ADD_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_add(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x32_ADD_DEFINED +#endif +#ifndef VUINT8x32_SUB_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_sub(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x32_SUB_DEFINED +#endif +#ifndef VUINT8x32_MUL_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_mul(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x32_MUL_DEFINED +#endif +#ifndef VUINT8x32_AND_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_and(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x32_AND_DEFINED +#endif +#ifndef VUINT8x32_OR_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_or(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x32_OR_DEFINED +#endif +#ifndef VUINT8x32_XOR_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_xor(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x32_XOR_DEFINED +#endif +#ifndef VUINT8x32_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmplt(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x32_CMPLT_DEFINED +#endif +#ifndef VUINT8x32_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpeq(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x32_CMPEQ_DEFINED +#endif +#ifndef VUINT8x32_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpgt(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x32_CMPGT_DEFINED +#endif +#ifndef VUINT8x32_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmple(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x32_CMPLE_DEFINED +#endif +#ifndef VUINT8x32_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_cmpge(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x32_CMPGE_DEFINED +#endif +#ifndef VUINT8x32_MIN_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_min(vuint8x32 vec1, vuint8x32 vec2) +{ + vuint8x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x32_MIN_DEFINED +#endif +#ifndef VUINT8x32_MAX_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_max(vuint8x32 vec1, vuint8x32 vec2) +{ + vuint8x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x32_MAX_DEFINED +#endif +#ifndef VUINT8x32_AVG_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_avg(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x32_AVG_DEFINED +#endif +#ifndef VUINT8x32_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_lshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x32_LSHIFT_DEFINED +#endif +#ifndef VUINT8x32_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_rshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x32_RSHIFT_DEFINED +#endif +#ifndef VUINT8x32_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_lrshift(vuint8x32 vec1, vuint8x32 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x32_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x32_NOT_DEFINED +VEC_FUNC_IMPL vuint8x32 vuint8x32_not(vuint8x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x32_NOT_DEFINED +#endif + + +/* vuint8x64 */ + +#ifndef VINT8x64_SPLAT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_splat(vec_int8 x) +{ + vint8x64 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT8x64_SPLAT_DEFINED +#endif +#ifndef VINT8x64_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_load_aligned(const vec_int8 x[64]) +{ + vint8x64 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT8x64_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT8x64_LOAD_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_load(const vec_int8 x[64]) +{ + vint8x64 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT8x64_LOAD_DEFINED +#endif +#ifndef VINT8x64_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint8x64_store_aligned(vint8x64 vec, vec_int8 arr[64]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT8x64_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT8x64_STORE_DEFINED +VEC_FUNC_IMPL void vint8x64_store(vint8x64 vec, vec_int8 arr[64]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT8x64_STORE_DEFINED +#endif +#ifndef VINT8x64_ADD_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_add(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT8x64_ADD_DEFINED +#endif +#ifndef VINT8x64_SUB_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_sub(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT8x64_SUB_DEFINED +#endif +#ifndef VINT8x64_MUL_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_mul(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT8x64_MUL_DEFINED +#endif +#ifndef VINT8x64_AND_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_and(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT8x64_AND_DEFINED +#endif +#ifndef VINT8x64_OR_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_or(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT8x64_OR_DEFINED +#endif +#ifndef VINT8x64_XOR_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_xor(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT8x64_XOR_DEFINED +#endif +#ifndef VINT8x64_CMPLT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_cmplt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT8x64_CMPLT_DEFINED +#endif +#ifndef VINT8x64_CMPEQ_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_cmpeq(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT8x64_CMPEQ_DEFINED +#endif +#ifndef VINT8x64_CMPGT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_cmpgt(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT8x64_CMPGT_DEFINED +#endif +#ifndef VINT8x64_CMPLE_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_cmple(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT8x64_CMPLE_DEFINED +#endif +#ifndef VINT8x64_CMPGE_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_cmpge(vint8x64 vec1, vint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT8x64_CMPGE_DEFINED +#endif +#ifndef VINT8x64_MIN_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_min(vint8x64 vec1, vint8x64 vec2) +{ + vint8x64 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x64_MIN_DEFINED +#endif +#ifndef VINT8x64_MAX_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_max(vint8x64 vec1, vint8x64 vec2) +{ + vint8x64 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT8x64_MAX_DEFINED +#endif +#ifndef VINT8x64_AVG_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_avg(vint8x64 vec1, vint8x64 vec2) +{ + vint8x64 ones = vint8x64_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT8x64_AVG_DEFINED +#endif +#ifndef VINT8x64_LSHIFT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_lshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT8x64_LSHIFT_DEFINED +#endif +#ifndef VINT8x64_RSHIFT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_rshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x64_RSHIFT_DEFINED +#endif +#ifndef VINT8x64_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_lrshift(vint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT8x64_LRSHIFT_DEFINED +#endif +#ifndef VINT8x64_NOT_DEFINED +VEC_FUNC_IMPL vint8x64 vint8x64_not(vint8x64 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT8x64_NOT_DEFINED +#endif + + +/* vint8x64 */ + +#ifndef VUINT8x64_SPLAT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_splat(vec_uint8 x) +{ + vuint8x64 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT8x64_SPLAT_DEFINED +#endif +#ifndef VUINT8x64_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_load_aligned(const vec_uint8 x[64]) +{ + vuint8x64 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT8x64_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT8x64_LOAD_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_load(const vec_uint8 x[64]) +{ + vuint8x64 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT8x64_LOAD_DEFINED +#endif +#ifndef VUINT8x64_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint8x64_store_aligned(vuint8x64 vec, vec_uint8 arr[64]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT8x64_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT8x64_STORE_DEFINED +VEC_FUNC_IMPL void vuint8x64_store(vuint8x64 vec, vec_uint8 arr[64]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT8x64_STORE_DEFINED +#endif +#ifndef VUINT8x64_ADD_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_add(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT8x64_ADD_DEFINED +#endif +#ifndef VUINT8x64_SUB_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_sub(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT8x64_SUB_DEFINED +#endif +#ifndef VUINT8x64_MUL_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_mul(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT8x64_MUL_DEFINED +#endif +#ifndef VUINT8x64_AND_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_and(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT8x64_AND_DEFINED +#endif +#ifndef VUINT8x64_OR_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_or(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT8x64_OR_DEFINED +#endif +#ifndef VUINT8x64_XOR_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_xor(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT8x64_XOR_DEFINED +#endif +#ifndef VUINT8x64_CMPLT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmplt(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT8x64_CMPLT_DEFINED +#endif +#ifndef VUINT8x64_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpeq(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT8x64_CMPEQ_DEFINED +#endif +#ifndef VUINT8x64_CMPGT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpgt(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT8x64_CMPGT_DEFINED +#endif +#ifndef VUINT8x64_CMPLE_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmple(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT8x64_CMPLE_DEFINED +#endif +#ifndef VUINT8x64_CMPGE_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_cmpge(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT8x64_CMPGE_DEFINED +#endif +#ifndef VUINT8x64_MIN_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_min(vuint8x64 vec1, vuint8x64 vec2) +{ + vuint8x64 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x64_MIN_DEFINED +#endif +#ifndef VUINT8x64_MAX_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_max(vuint8x64 vec1, vuint8x64 vec2) +{ + vuint8x64 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT8x64_MAX_DEFINED +#endif +#ifndef VUINT8x64_AVG_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_avg(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT8x64_AVG_DEFINED +#endif +#ifndef VUINT8x64_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_lshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT8x64_LSHIFT_DEFINED +#endif +#ifndef VUINT8x64_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_rshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x64_RSHIFT_DEFINED +#endif +#ifndef VUINT8x64_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_lrshift(vuint8x64 vec1, vuint8x64 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint8 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT8x64_LRSHIFT_DEFINED +#endif +#ifndef VUINT8x64_NOT_DEFINED +VEC_FUNC_IMPL vuint8x64 vuint8x64_not(vuint8x64 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT8x64_NOT_DEFINED +#endif + + +/* vuint16x2 */ + +#ifndef VINT16x2_SPLAT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_splat(vec_int16 x) +{ + vint16x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VINT16x2_SPLAT_DEFINED +#endif +#ifndef VINT16x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_load_aligned(const vec_int16 x[2]) +{ + vint16x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT16x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT16x2_LOAD_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_load(const vec_int16 x[2]) +{ + vint16x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT16x2_LOAD_DEFINED +#endif +#ifndef VINT16x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint16x2_store_aligned(vint16x2 vec, vec_int16 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT16x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT16x2_STORE_DEFINED +VEC_FUNC_IMPL void vint16x2_store(vint16x2 vec, vec_int16 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT16x2_STORE_DEFINED +#endif +#ifndef VINT16x2_ADD_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_add(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT16x2_ADD_DEFINED +#endif +#ifndef VINT16x2_SUB_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_sub(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT16x2_SUB_DEFINED +#endif +#ifndef VINT16x2_MUL_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_mul(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT16x2_MUL_DEFINED +#endif +#ifndef VINT16x2_AND_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_and(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x2_AND_DEFINED +#endif +#ifndef VINT16x2_OR_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_or(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x2_OR_DEFINED +#endif +#ifndef VINT16x2_XOR_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_xor(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x2_XOR_DEFINED +#endif +#ifndef VINT16x2_CMPLT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_cmplt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x2_CMPLT_DEFINED +#endif +#ifndef VINT16x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_cmpeq(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x2_CMPEQ_DEFINED +#endif +#ifndef VINT16x2_CMPGT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_cmpgt(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x2_CMPGT_DEFINED +#endif +#ifndef VINT16x2_CMPLE_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_cmple(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x2_CMPLE_DEFINED +#endif +#ifndef VINT16x2_CMPGE_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_cmpge(vint16x2 vec1, vint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x2_CMPGE_DEFINED +#endif +#ifndef VINT16x2_MIN_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_min(vint16x2 vec1, vint16x2 vec2) +{ + vint16x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x2_MIN_DEFINED +#endif +#ifndef VINT16x2_MAX_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_max(vint16x2 vec1, vint16x2 vec2) +{ + vint16x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x2_MAX_DEFINED +#endif +#ifndef VINT16x2_AVG_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_avg(vint16x2 vec1, vint16x2 vec2) +{ + vint16x2 ones = vint16x2_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT16x2_AVG_DEFINED +#endif +#ifndef VINT16x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_lshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x2_LSHIFT_DEFINED +#endif +#ifndef VINT16x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_rshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x2_RSHIFT_DEFINED +#endif +#ifndef VINT16x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_lrshift(vint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x2_LRSHIFT_DEFINED +#endif +#ifndef VINT16x2_NOT_DEFINED +VEC_FUNC_IMPL vint16x2 vint16x2_not(vint16x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x2_NOT_DEFINED +#endif + + +/* vint16x2 */ + +#ifndef VUINT16x2_SPLAT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_splat(vec_uint16 x) +{ + vuint16x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VUINT16x2_SPLAT_DEFINED +#endif +#ifndef VUINT16x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_load_aligned(const vec_uint16 x[2]) +{ + vuint16x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT16x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT16x2_LOAD_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_load(const vec_uint16 x[2]) +{ + vuint16x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT16x2_LOAD_DEFINED +#endif +#ifndef VUINT16x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint16x2_store_aligned(vuint16x2 vec, vec_uint16 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT16x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT16x2_STORE_DEFINED +VEC_FUNC_IMPL void vuint16x2_store(vuint16x2 vec, vec_uint16 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT16x2_STORE_DEFINED +#endif +#ifndef VUINT16x2_ADD_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_add(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT16x2_ADD_DEFINED +#endif +#ifndef VUINT16x2_SUB_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_sub(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT16x2_SUB_DEFINED +#endif +#ifndef VUINT16x2_MUL_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_mul(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT16x2_MUL_DEFINED +#endif +#ifndef VUINT16x2_AND_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_and(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT16x2_AND_DEFINED +#endif +#ifndef VUINT16x2_OR_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_or(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT16x2_OR_DEFINED +#endif +#ifndef VUINT16x2_XOR_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_xor(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT16x2_XOR_DEFINED +#endif +#ifndef VUINT16x2_CMPLT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmplt(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT16x2_CMPLT_DEFINED +#endif +#ifndef VUINT16x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpeq(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT16x2_CMPEQ_DEFINED +#endif +#ifndef VUINT16x2_CMPGT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpgt(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT16x2_CMPGT_DEFINED +#endif +#ifndef VUINT16x2_CMPLE_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmple(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT16x2_CMPLE_DEFINED +#endif +#ifndef VUINT16x2_CMPGE_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_cmpge(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT16x2_CMPGE_DEFINED +#endif +#ifndef VUINT16x2_MIN_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_min(vuint16x2 vec1, vuint16x2 vec2) +{ + vuint16x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x2_MIN_DEFINED +#endif +#ifndef VUINT16x2_MAX_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_max(vuint16x2 vec1, vuint16x2 vec2) +{ + vuint16x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x2_MAX_DEFINED +#endif +#ifndef VUINT16x2_AVG_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_avg(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x2_AVG_DEFINED +#endif +#ifndef VUINT16x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_lshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x2_LSHIFT_DEFINED +#endif +#ifndef VUINT16x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_rshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x2_RSHIFT_DEFINED +#endif +#ifndef VUINT16x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_lrshift(vuint16x2 vec1, vuint16x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(4))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x2_LRSHIFT_DEFINED +#endif +#ifndef VUINT16x2_NOT_DEFINED +VEC_FUNC_IMPL vuint16x2 vuint16x2_not(vuint16x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x2_NOT_DEFINED +#endif + + +/* vuint16x4 */ + +#ifndef VINT16x4_SPLAT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_splat(vec_int16 x) +{ + vint16x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VINT16x4_SPLAT_DEFINED +#endif +#ifndef VINT16x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_load_aligned(const vec_int16 x[4]) +{ + vint16x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT16x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT16x4_LOAD_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_load(const vec_int16 x[4]) +{ + vint16x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT16x4_LOAD_DEFINED +#endif +#ifndef VINT16x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint16x4_store_aligned(vint16x4 vec, vec_int16 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT16x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT16x4_STORE_DEFINED +VEC_FUNC_IMPL void vint16x4_store(vint16x4 vec, vec_int16 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT16x4_STORE_DEFINED +#endif +#ifndef VINT16x4_ADD_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_add(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT16x4_ADD_DEFINED +#endif +#ifndef VINT16x4_SUB_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_sub(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT16x4_SUB_DEFINED +#endif +#ifndef VINT16x4_MUL_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_mul(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT16x4_MUL_DEFINED +#endif +#ifndef VINT16x4_AND_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_and(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x4_AND_DEFINED +#endif +#ifndef VINT16x4_OR_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_or(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x4_OR_DEFINED +#endif +#ifndef VINT16x4_XOR_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_xor(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x4_XOR_DEFINED +#endif +#ifndef VINT16x4_CMPLT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_cmplt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x4_CMPLT_DEFINED +#endif +#ifndef VINT16x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_cmpeq(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x4_CMPEQ_DEFINED +#endif +#ifndef VINT16x4_CMPGT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_cmpgt(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x4_CMPGT_DEFINED +#endif +#ifndef VINT16x4_CMPLE_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_cmple(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x4_CMPLE_DEFINED +#endif +#ifndef VINT16x4_CMPGE_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_cmpge(vint16x4 vec1, vint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x4_CMPGE_DEFINED +#endif +#ifndef VINT16x4_MIN_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_min(vint16x4 vec1, vint16x4 vec2) +{ + vint16x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x4_MIN_DEFINED +#endif +#ifndef VINT16x4_MAX_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_max(vint16x4 vec1, vint16x4 vec2) +{ + vint16x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x4_MAX_DEFINED +#endif +#ifndef VINT16x4_AVG_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_avg(vint16x4 vec1, vint16x4 vec2) +{ + vint16x4 ones = vint16x4_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT16x4_AVG_DEFINED +#endif +#ifndef VINT16x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_lshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x4_LSHIFT_DEFINED +#endif +#ifndef VINT16x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_rshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x4_RSHIFT_DEFINED +#endif +#ifndef VINT16x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_lrshift(vint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x4_LRSHIFT_DEFINED +#endif +#ifndef VINT16x4_NOT_DEFINED +VEC_FUNC_IMPL vint16x4 vint16x4_not(vint16x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x4_NOT_DEFINED +#endif + + +/* vint16x4 */ + +#ifndef VUINT16x4_SPLAT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_splat(vec_uint16 x) +{ + vuint16x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VUINT16x4_SPLAT_DEFINED +#endif +#ifndef VUINT16x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_load_aligned(const vec_uint16 x[4]) +{ + vuint16x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT16x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT16x4_LOAD_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_load(const vec_uint16 x[4]) +{ + vuint16x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT16x4_LOAD_DEFINED +#endif +#ifndef VUINT16x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint16x4_store_aligned(vuint16x4 vec, vec_uint16 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT16x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT16x4_STORE_DEFINED +VEC_FUNC_IMPL void vuint16x4_store(vuint16x4 vec, vec_uint16 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT16x4_STORE_DEFINED +#endif +#ifndef VUINT16x4_ADD_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_add(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT16x4_ADD_DEFINED +#endif +#ifndef VUINT16x4_SUB_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_sub(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT16x4_SUB_DEFINED +#endif +#ifndef VUINT16x4_MUL_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_mul(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT16x4_MUL_DEFINED +#endif +#ifndef VUINT16x4_AND_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_and(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT16x4_AND_DEFINED +#endif +#ifndef VUINT16x4_OR_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_or(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT16x4_OR_DEFINED +#endif +#ifndef VUINT16x4_XOR_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_xor(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT16x4_XOR_DEFINED +#endif +#ifndef VUINT16x4_CMPLT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmplt(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT16x4_CMPLT_DEFINED +#endif +#ifndef VUINT16x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpeq(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT16x4_CMPEQ_DEFINED +#endif +#ifndef VUINT16x4_CMPGT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpgt(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT16x4_CMPGT_DEFINED +#endif +#ifndef VUINT16x4_CMPLE_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmple(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT16x4_CMPLE_DEFINED +#endif +#ifndef VUINT16x4_CMPGE_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_cmpge(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT16x4_CMPGE_DEFINED +#endif +#ifndef VUINT16x4_MIN_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_min(vuint16x4 vec1, vuint16x4 vec2) +{ + vuint16x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x4_MIN_DEFINED +#endif +#ifndef VUINT16x4_MAX_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_max(vuint16x4 vec1, vuint16x4 vec2) +{ + vuint16x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x4_MAX_DEFINED +#endif +#ifndef VUINT16x4_AVG_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_avg(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x4_AVG_DEFINED +#endif +#ifndef VUINT16x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_lshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x4_LSHIFT_DEFINED +#endif +#ifndef VUINT16x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_rshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x4_RSHIFT_DEFINED +#endif +#ifndef VUINT16x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_lrshift(vuint16x4 vec1, vuint16x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x4_LRSHIFT_DEFINED +#endif +#ifndef VUINT16x4_NOT_DEFINED +VEC_FUNC_IMPL vuint16x4 vuint16x4_not(vuint16x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x4_NOT_DEFINED +#endif + + +/* vuint16x8 */ + +#ifndef VINT16x8_SPLAT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_splat(vec_int16 x) +{ + vint16x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT16x8_SPLAT_DEFINED +#endif +#ifndef VINT16x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_load_aligned(const vec_int16 x[8]) +{ + vint16x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT16x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT16x8_LOAD_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_load(const vec_int16 x[8]) +{ + vint16x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT16x8_LOAD_DEFINED +#endif +#ifndef VINT16x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint16x8_store_aligned(vint16x8 vec, vec_int16 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT16x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT16x8_STORE_DEFINED +VEC_FUNC_IMPL void vint16x8_store(vint16x8 vec, vec_int16 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT16x8_STORE_DEFINED +#endif +#ifndef VINT16x8_ADD_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_add(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT16x8_ADD_DEFINED +#endif +#ifndef VINT16x8_SUB_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_sub(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT16x8_SUB_DEFINED +#endif +#ifndef VINT16x8_MUL_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_mul(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT16x8_MUL_DEFINED +#endif +#ifndef VINT16x8_AND_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_and(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x8_AND_DEFINED +#endif +#ifndef VINT16x8_OR_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_or(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x8_OR_DEFINED +#endif +#ifndef VINT16x8_XOR_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_xor(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x8_XOR_DEFINED +#endif +#ifndef VINT16x8_CMPLT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_cmplt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x8_CMPLT_DEFINED +#endif +#ifndef VINT16x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_cmpeq(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x8_CMPEQ_DEFINED +#endif +#ifndef VINT16x8_CMPGT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_cmpgt(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x8_CMPGT_DEFINED +#endif +#ifndef VINT16x8_CMPLE_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_cmple(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x8_CMPLE_DEFINED +#endif +#ifndef VINT16x8_CMPGE_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_cmpge(vint16x8 vec1, vint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x8_CMPGE_DEFINED +#endif +#ifndef VINT16x8_MIN_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_min(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x8_MIN_DEFINED +#endif +#ifndef VINT16x8_MAX_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_max(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x8_MAX_DEFINED +#endif +#ifndef VINT16x8_AVG_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_avg(vint16x8 vec1, vint16x8 vec2) +{ + vint16x8 ones = vint16x8_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT16x8_AVG_DEFINED +#endif +#ifndef VINT16x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_lshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x8_LSHIFT_DEFINED +#endif +#ifndef VINT16x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_rshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x8_RSHIFT_DEFINED +#endif +#ifndef VINT16x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_lrshift(vint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x8_LRSHIFT_DEFINED +#endif +#ifndef VINT16x8_NOT_DEFINED +VEC_FUNC_IMPL vint16x8 vint16x8_not(vint16x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x8_NOT_DEFINED +#endif + + +/* vint16x8 */ + +#ifndef VUINT16x8_SPLAT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_splat(vec_uint16 x) +{ + vuint16x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT16x8_SPLAT_DEFINED +#endif +#ifndef VUINT16x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_load_aligned(const vec_uint16 x[8]) +{ + vuint16x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT16x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT16x8_LOAD_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_load(const vec_uint16 x[8]) +{ + vuint16x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT16x8_LOAD_DEFINED +#endif +#ifndef VUINT16x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint16x8_store_aligned(vuint16x8 vec, vec_uint16 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT16x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT16x8_STORE_DEFINED +VEC_FUNC_IMPL void vuint16x8_store(vuint16x8 vec, vec_uint16 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT16x8_STORE_DEFINED +#endif +#ifndef VUINT16x8_ADD_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_add(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT16x8_ADD_DEFINED +#endif +#ifndef VUINT16x8_SUB_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_sub(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT16x8_SUB_DEFINED +#endif +#ifndef VUINT16x8_MUL_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_mul(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT16x8_MUL_DEFINED +#endif +#ifndef VUINT16x8_AND_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_and(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT16x8_AND_DEFINED +#endif +#ifndef VUINT16x8_OR_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_or(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT16x8_OR_DEFINED +#endif +#ifndef VUINT16x8_XOR_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_xor(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT16x8_XOR_DEFINED +#endif +#ifndef VUINT16x8_CMPLT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmplt(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT16x8_CMPLT_DEFINED +#endif +#ifndef VUINT16x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpeq(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT16x8_CMPEQ_DEFINED +#endif +#ifndef VUINT16x8_CMPGT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpgt(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT16x8_CMPGT_DEFINED +#endif +#ifndef VUINT16x8_CMPLE_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmple(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT16x8_CMPLE_DEFINED +#endif +#ifndef VUINT16x8_CMPGE_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_cmpge(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT16x8_CMPGE_DEFINED +#endif +#ifndef VUINT16x8_MIN_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_min(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x8_MIN_DEFINED +#endif +#ifndef VUINT16x8_MAX_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_max(vuint16x8 vec1, vuint16x8 vec2) +{ + vuint16x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x8_MAX_DEFINED +#endif +#ifndef VUINT16x8_AVG_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_avg(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x8_AVG_DEFINED +#endif +#ifndef VUINT16x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_lshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x8_LSHIFT_DEFINED +#endif +#ifndef VUINT16x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_rshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x8_RSHIFT_DEFINED +#endif +#ifndef VUINT16x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_lrshift(vuint16x8 vec1, vuint16x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x8_LRSHIFT_DEFINED +#endif +#ifndef VUINT16x8_NOT_DEFINED +VEC_FUNC_IMPL vuint16x8 vuint16x8_not(vuint16x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x8_NOT_DEFINED +#endif + + +/* vuint16x16 */ + +#ifndef VINT16x16_SPLAT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_splat(vec_int16 x) +{ + vint16x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT16x16_SPLAT_DEFINED +#endif +#ifndef VINT16x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_load_aligned(const vec_int16 x[16]) +{ + vint16x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT16x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT16x16_LOAD_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_load(const vec_int16 x[16]) +{ + vint16x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT16x16_LOAD_DEFINED +#endif +#ifndef VINT16x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint16x16_store_aligned(vint16x16 vec, vec_int16 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT16x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT16x16_STORE_DEFINED +VEC_FUNC_IMPL void vint16x16_store(vint16x16 vec, vec_int16 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT16x16_STORE_DEFINED +#endif +#ifndef VINT16x16_ADD_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_add(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT16x16_ADD_DEFINED +#endif +#ifndef VINT16x16_SUB_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_sub(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT16x16_SUB_DEFINED +#endif +#ifndef VINT16x16_MUL_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_mul(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT16x16_MUL_DEFINED +#endif +#ifndef VINT16x16_AND_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_and(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x16_AND_DEFINED +#endif +#ifndef VINT16x16_OR_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_or(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x16_OR_DEFINED +#endif +#ifndef VINT16x16_XOR_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_xor(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x16_XOR_DEFINED +#endif +#ifndef VINT16x16_CMPLT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_cmplt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x16_CMPLT_DEFINED +#endif +#ifndef VINT16x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_cmpeq(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x16_CMPEQ_DEFINED +#endif +#ifndef VINT16x16_CMPGT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_cmpgt(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x16_CMPGT_DEFINED +#endif +#ifndef VINT16x16_CMPLE_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_cmple(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x16_CMPLE_DEFINED +#endif +#ifndef VINT16x16_CMPGE_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_cmpge(vint16x16 vec1, vint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x16_CMPGE_DEFINED +#endif +#ifndef VINT16x16_MIN_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_min(vint16x16 vec1, vint16x16 vec2) +{ + vint16x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x16_MIN_DEFINED +#endif +#ifndef VINT16x16_MAX_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_max(vint16x16 vec1, vint16x16 vec2) +{ + vint16x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x16_MAX_DEFINED +#endif +#ifndef VINT16x16_AVG_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_avg(vint16x16 vec1, vint16x16 vec2) +{ + vint16x16 ones = vint16x16_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT16x16_AVG_DEFINED +#endif +#ifndef VINT16x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_lshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x16_LSHIFT_DEFINED +#endif +#ifndef VINT16x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_rshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x16_RSHIFT_DEFINED +#endif +#ifndef VINT16x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_lrshift(vint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x16_LRSHIFT_DEFINED +#endif +#ifndef VINT16x16_NOT_DEFINED +VEC_FUNC_IMPL vint16x16 vint16x16_not(vint16x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x16_NOT_DEFINED +#endif + + +/* vint16x16 */ + +#ifndef VUINT16x16_SPLAT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_splat(vec_uint16 x) +{ + vuint16x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT16x16_SPLAT_DEFINED +#endif +#ifndef VUINT16x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_load_aligned(const vec_uint16 x[16]) +{ + vuint16x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT16x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT16x16_LOAD_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_load(const vec_uint16 x[16]) +{ + vuint16x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT16x16_LOAD_DEFINED +#endif +#ifndef VUINT16x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint16x16_store_aligned(vuint16x16 vec, vec_uint16 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT16x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT16x16_STORE_DEFINED +VEC_FUNC_IMPL void vuint16x16_store(vuint16x16 vec, vec_uint16 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT16x16_STORE_DEFINED +#endif +#ifndef VUINT16x16_ADD_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_add(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT16x16_ADD_DEFINED +#endif +#ifndef VUINT16x16_SUB_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_sub(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT16x16_SUB_DEFINED +#endif +#ifndef VUINT16x16_MUL_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_mul(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT16x16_MUL_DEFINED +#endif +#ifndef VUINT16x16_AND_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_and(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT16x16_AND_DEFINED +#endif +#ifndef VUINT16x16_OR_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_or(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT16x16_OR_DEFINED +#endif +#ifndef VUINT16x16_XOR_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_xor(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT16x16_XOR_DEFINED +#endif +#ifndef VUINT16x16_CMPLT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmplt(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT16x16_CMPLT_DEFINED +#endif +#ifndef VUINT16x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpeq(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT16x16_CMPEQ_DEFINED +#endif +#ifndef VUINT16x16_CMPGT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpgt(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT16x16_CMPGT_DEFINED +#endif +#ifndef VUINT16x16_CMPLE_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmple(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT16x16_CMPLE_DEFINED +#endif +#ifndef VUINT16x16_CMPGE_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_cmpge(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT16x16_CMPGE_DEFINED +#endif +#ifndef VUINT16x16_MIN_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_min(vuint16x16 vec1, vuint16x16 vec2) +{ + vuint16x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x16_MIN_DEFINED +#endif +#ifndef VUINT16x16_MAX_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_max(vuint16x16 vec1, vuint16x16 vec2) +{ + vuint16x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x16_MAX_DEFINED +#endif +#ifndef VUINT16x16_AVG_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_avg(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x16_AVG_DEFINED +#endif +#ifndef VUINT16x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_lshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x16_LSHIFT_DEFINED +#endif +#ifndef VUINT16x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_rshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x16_RSHIFT_DEFINED +#endif +#ifndef VUINT16x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_lrshift(vuint16x16 vec1, vuint16x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x16_LRSHIFT_DEFINED +#endif +#ifndef VUINT16x16_NOT_DEFINED +VEC_FUNC_IMPL vuint16x16 vuint16x16_not(vuint16x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x16_NOT_DEFINED +#endif + + +/* vuint16x32 */ + +#ifndef VINT16x32_SPLAT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_splat(vec_int16 x) +{ + vint16x32 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT16x32_SPLAT_DEFINED +#endif +#ifndef VINT16x32_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_load_aligned(const vec_int16 x[32]) +{ + vint16x32 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT16x32_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT16x32_LOAD_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_load(const vec_int16 x[32]) +{ + vint16x32 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT16x32_LOAD_DEFINED +#endif +#ifndef VINT16x32_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint16x32_store_aligned(vint16x32 vec, vec_int16 arr[32]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT16x32_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT16x32_STORE_DEFINED +VEC_FUNC_IMPL void vint16x32_store(vint16x32 vec, vec_int16 arr[32]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT16x32_STORE_DEFINED +#endif +#ifndef VINT16x32_ADD_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_add(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT16x32_ADD_DEFINED +#endif +#ifndef VINT16x32_SUB_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_sub(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT16x32_SUB_DEFINED +#endif +#ifndef VINT16x32_MUL_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_mul(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT16x32_MUL_DEFINED +#endif +#ifndef VINT16x32_AND_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_and(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT16x32_AND_DEFINED +#endif +#ifndef VINT16x32_OR_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_or(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT16x32_OR_DEFINED +#endif +#ifndef VINT16x32_XOR_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_xor(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT16x32_XOR_DEFINED +#endif +#ifndef VINT16x32_CMPLT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_cmplt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT16x32_CMPLT_DEFINED +#endif +#ifndef VINT16x32_CMPEQ_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_cmpeq(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT16x32_CMPEQ_DEFINED +#endif +#ifndef VINT16x32_CMPGT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_cmpgt(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT16x32_CMPGT_DEFINED +#endif +#ifndef VINT16x32_CMPLE_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_cmple(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT16x32_CMPLE_DEFINED +#endif +#ifndef VINT16x32_CMPGE_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_cmpge(vint16x32 vec1, vint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT16x32_CMPGE_DEFINED +#endif +#ifndef VINT16x32_MIN_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_min(vint16x32 vec1, vint16x32 vec2) +{ + vint16x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x32_MIN_DEFINED +#endif +#ifndef VINT16x32_MAX_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_max(vint16x32 vec1, vint16x32 vec2) +{ + vint16x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT16x32_MAX_DEFINED +#endif +#ifndef VINT16x32_AVG_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_avg(vint16x32 vec1, vint16x32 vec2) +{ + vint16x32 ones = vint16x32_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT16x32_AVG_DEFINED +#endif +#ifndef VINT16x32_LSHIFT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_lshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT16x32_LSHIFT_DEFINED +#endif +#ifndef VINT16x32_RSHIFT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_rshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x32_RSHIFT_DEFINED +#endif +#ifndef VINT16x32_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_lrshift(vint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT16x32_LRSHIFT_DEFINED +#endif +#ifndef VINT16x32_NOT_DEFINED +VEC_FUNC_IMPL vint16x32 vint16x32_not(vint16x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT16x32_NOT_DEFINED +#endif + + +/* vint16x32 */ + +#ifndef VUINT16x32_SPLAT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_splat(vec_uint16 x) +{ + vuint16x32 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT16x32_SPLAT_DEFINED +#endif +#ifndef VUINT16x32_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_load_aligned(const vec_uint16 x[32]) +{ + vuint16x32 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT16x32_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT16x32_LOAD_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_load(const vec_uint16 x[32]) +{ + vuint16x32 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT16x32_LOAD_DEFINED +#endif +#ifndef VUINT16x32_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint16x32_store_aligned(vuint16x32 vec, vec_uint16 arr[32]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT16x32_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT16x32_STORE_DEFINED +VEC_FUNC_IMPL void vuint16x32_store(vuint16x32 vec, vec_uint16 arr[32]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT16x32_STORE_DEFINED +#endif +#ifndef VUINT16x32_ADD_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_add(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT16x32_ADD_DEFINED +#endif +#ifndef VUINT16x32_SUB_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_sub(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT16x32_SUB_DEFINED +#endif +#ifndef VUINT16x32_MUL_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_mul(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT16x32_MUL_DEFINED +#endif +#ifndef VUINT16x32_AND_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_and(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT16x32_AND_DEFINED +#endif +#ifndef VUINT16x32_OR_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_or(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT16x32_OR_DEFINED +#endif +#ifndef VUINT16x32_XOR_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_xor(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT16x32_XOR_DEFINED +#endif +#ifndef VUINT16x32_CMPLT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmplt(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT16x32_CMPLT_DEFINED +#endif +#ifndef VUINT16x32_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpeq(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT16x32_CMPEQ_DEFINED +#endif +#ifndef VUINT16x32_CMPGT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpgt(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT16x32_CMPGT_DEFINED +#endif +#ifndef VUINT16x32_CMPLE_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmple(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT16x32_CMPLE_DEFINED +#endif +#ifndef VUINT16x32_CMPGE_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_cmpge(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT16x32_CMPGE_DEFINED +#endif +#ifndef VUINT16x32_MIN_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_min(vuint16x32 vec1, vuint16x32 vec2) +{ + vuint16x32 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x32_MIN_DEFINED +#endif +#ifndef VUINT16x32_MAX_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_max(vuint16x32 vec1, vuint16x32 vec2) +{ + vuint16x32 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT16x32_MAX_DEFINED +#endif +#ifndef VUINT16x32_AVG_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_avg(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT16x32_AVG_DEFINED +#endif +#ifndef VUINT16x32_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_lshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT16x32_LSHIFT_DEFINED +#endif +#ifndef VUINT16x32_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_rshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x32_RSHIFT_DEFINED +#endif +#ifndef VUINT16x32_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_lrshift(vuint16x32 vec1, vuint16x32 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint16 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT16x32_LRSHIFT_DEFINED +#endif +#ifndef VUINT16x32_NOT_DEFINED +VEC_FUNC_IMPL vuint16x32 vuint16x32_not(vuint16x32 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT16x32_NOT_DEFINED +#endif + + +/* vuint32x2 */ + +#ifndef VINT32x2_SPLAT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_splat(vec_int32 x) +{ + vint32x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VINT32x2_SPLAT_DEFINED +#endif +#ifndef VINT32x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_load_aligned(const vec_int32 x[2]) +{ + vint32x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT32x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT32x2_LOAD_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_load(const vec_int32 x[2]) +{ + vint32x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT32x2_LOAD_DEFINED +#endif +#ifndef VINT32x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint32x2_store_aligned(vint32x2 vec, vec_int32 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT32x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT32x2_STORE_DEFINED +VEC_FUNC_IMPL void vint32x2_store(vint32x2 vec, vec_int32 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT32x2_STORE_DEFINED +#endif +#ifndef VINT32x2_ADD_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_add(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT32x2_ADD_DEFINED +#endif +#ifndef VINT32x2_SUB_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_sub(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT32x2_SUB_DEFINED +#endif +#ifndef VINT32x2_MUL_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_mul(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT32x2_MUL_DEFINED +#endif +#ifndef VINT32x2_AND_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_and(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x2_AND_DEFINED +#endif +#ifndef VINT32x2_OR_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_or(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x2_OR_DEFINED +#endif +#ifndef VINT32x2_XOR_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_xor(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x2_XOR_DEFINED +#endif +#ifndef VINT32x2_CMPLT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_cmplt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x2_CMPLT_DEFINED +#endif +#ifndef VINT32x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_cmpeq(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x2_CMPEQ_DEFINED +#endif +#ifndef VINT32x2_CMPGT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_cmpgt(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x2_CMPGT_DEFINED +#endif +#ifndef VINT32x2_CMPLE_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_cmple(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x2_CMPLE_DEFINED +#endif +#ifndef VINT32x2_CMPGE_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_cmpge(vint32x2 vec1, vint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x2_CMPGE_DEFINED +#endif +#ifndef VINT32x2_MIN_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_min(vint32x2 vec1, vint32x2 vec2) +{ + vint32x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x2_MIN_DEFINED +#endif +#ifndef VINT32x2_MAX_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_max(vint32x2 vec1, vint32x2 vec2) +{ + vint32x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x2_MAX_DEFINED +#endif +#ifndef VINT32x2_AVG_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_avg(vint32x2 vec1, vint32x2 vec2) +{ + vint32x2 ones = vint32x2_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT32x2_AVG_DEFINED +#endif +#ifndef VINT32x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_lshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x2_LSHIFT_DEFINED +#endif +#ifndef VINT32x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_rshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x2_RSHIFT_DEFINED +#endif +#ifndef VINT32x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_lrshift(vint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x2_LRSHIFT_DEFINED +#endif +#ifndef VINT32x2_NOT_DEFINED +VEC_FUNC_IMPL vint32x2 vint32x2_not(vint32x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x2_NOT_DEFINED +#endif + + +/* vint32x2 */ + +#ifndef VUINT32x2_SPLAT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_splat(vec_uint32 x) +{ + vuint32x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VUINT32x2_SPLAT_DEFINED +#endif +#ifndef VUINT32x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_load_aligned(const vec_uint32 x[2]) +{ + vuint32x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT32x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT32x2_LOAD_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_load(const vec_uint32 x[2]) +{ + vuint32x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT32x2_LOAD_DEFINED +#endif +#ifndef VUINT32x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint32x2_store_aligned(vuint32x2 vec, vec_uint32 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT32x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT32x2_STORE_DEFINED +VEC_FUNC_IMPL void vuint32x2_store(vuint32x2 vec, vec_uint32 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT32x2_STORE_DEFINED +#endif +#ifndef VUINT32x2_ADD_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_add(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT32x2_ADD_DEFINED +#endif +#ifndef VUINT32x2_SUB_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_sub(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT32x2_SUB_DEFINED +#endif +#ifndef VUINT32x2_MUL_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_mul(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT32x2_MUL_DEFINED +#endif +#ifndef VUINT32x2_AND_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_and(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT32x2_AND_DEFINED +#endif +#ifndef VUINT32x2_OR_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_or(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT32x2_OR_DEFINED +#endif +#ifndef VUINT32x2_XOR_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_xor(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT32x2_XOR_DEFINED +#endif +#ifndef VUINT32x2_CMPLT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmplt(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT32x2_CMPLT_DEFINED +#endif +#ifndef VUINT32x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpeq(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT32x2_CMPEQ_DEFINED +#endif +#ifndef VUINT32x2_CMPGT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpgt(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT32x2_CMPGT_DEFINED +#endif +#ifndef VUINT32x2_CMPLE_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmple(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT32x2_CMPLE_DEFINED +#endif +#ifndef VUINT32x2_CMPGE_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_cmpge(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT32x2_CMPGE_DEFINED +#endif +#ifndef VUINT32x2_MIN_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_min(vuint32x2 vec1, vuint32x2 vec2) +{ + vuint32x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x2_MIN_DEFINED +#endif +#ifndef VUINT32x2_MAX_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_max(vuint32x2 vec1, vuint32x2 vec2) +{ + vuint32x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x2_MAX_DEFINED +#endif +#ifndef VUINT32x2_AVG_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_avg(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x2_AVG_DEFINED +#endif +#ifndef VUINT32x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_lshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x2_LSHIFT_DEFINED +#endif +#ifndef VUINT32x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_rshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x2_RSHIFT_DEFINED +#endif +#ifndef VUINT32x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_lrshift(vuint32x2 vec1, vuint32x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(8))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x2_LRSHIFT_DEFINED +#endif +#ifndef VUINT32x2_NOT_DEFINED +VEC_FUNC_IMPL vuint32x2 vuint32x2_not(vuint32x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x2_NOT_DEFINED +#endif + + +/* vuint32x4 */ + +#ifndef VINT32x4_SPLAT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_splat(vec_int32 x) +{ + vint32x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VINT32x4_SPLAT_DEFINED +#endif +#ifndef VINT32x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_load_aligned(const vec_int32 x[4]) +{ + vint32x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT32x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT32x4_LOAD_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_load(const vec_int32 x[4]) +{ + vint32x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT32x4_LOAD_DEFINED +#endif +#ifndef VINT32x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint32x4_store_aligned(vint32x4 vec, vec_int32 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT32x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT32x4_STORE_DEFINED +VEC_FUNC_IMPL void vint32x4_store(vint32x4 vec, vec_int32 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT32x4_STORE_DEFINED +#endif +#ifndef VINT32x4_ADD_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_add(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT32x4_ADD_DEFINED +#endif +#ifndef VINT32x4_SUB_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_sub(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT32x4_SUB_DEFINED +#endif +#ifndef VINT32x4_MUL_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_mul(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT32x4_MUL_DEFINED +#endif +#ifndef VINT32x4_AND_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_and(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x4_AND_DEFINED +#endif +#ifndef VINT32x4_OR_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_or(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x4_OR_DEFINED +#endif +#ifndef VINT32x4_XOR_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_xor(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x4_XOR_DEFINED +#endif +#ifndef VINT32x4_CMPLT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_cmplt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x4_CMPLT_DEFINED +#endif +#ifndef VINT32x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_cmpeq(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x4_CMPEQ_DEFINED +#endif +#ifndef VINT32x4_CMPGT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_cmpgt(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x4_CMPGT_DEFINED +#endif +#ifndef VINT32x4_CMPLE_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_cmple(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x4_CMPLE_DEFINED +#endif +#ifndef VINT32x4_CMPGE_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_cmpge(vint32x4 vec1, vint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x4_CMPGE_DEFINED +#endif +#ifndef VINT32x4_MIN_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_min(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x4_MIN_DEFINED +#endif +#ifndef VINT32x4_MAX_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_max(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x4_MAX_DEFINED +#endif +#ifndef VINT32x4_AVG_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_avg(vint32x4 vec1, vint32x4 vec2) +{ + vint32x4 ones = vint32x4_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT32x4_AVG_DEFINED +#endif +#ifndef VINT32x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_lshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x4_LSHIFT_DEFINED +#endif +#ifndef VINT32x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_rshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x4_RSHIFT_DEFINED +#endif +#ifndef VINT32x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_lrshift(vint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x4_LRSHIFT_DEFINED +#endif +#ifndef VINT32x4_NOT_DEFINED +VEC_FUNC_IMPL vint32x4 vint32x4_not(vint32x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x4_NOT_DEFINED +#endif + + +/* vint32x4 */ + +#ifndef VUINT32x4_SPLAT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_splat(vec_uint32 x) +{ + vuint32x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VUINT32x4_SPLAT_DEFINED +#endif +#ifndef VUINT32x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_load_aligned(const vec_uint32 x[4]) +{ + vuint32x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT32x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT32x4_LOAD_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_load(const vec_uint32 x[4]) +{ + vuint32x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT32x4_LOAD_DEFINED +#endif +#ifndef VUINT32x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint32x4_store_aligned(vuint32x4 vec, vec_uint32 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT32x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT32x4_STORE_DEFINED +VEC_FUNC_IMPL void vuint32x4_store(vuint32x4 vec, vec_uint32 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT32x4_STORE_DEFINED +#endif +#ifndef VUINT32x4_ADD_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_add(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT32x4_ADD_DEFINED +#endif +#ifndef VUINT32x4_SUB_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_sub(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT32x4_SUB_DEFINED +#endif +#ifndef VUINT32x4_MUL_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_mul(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT32x4_MUL_DEFINED +#endif +#ifndef VUINT32x4_AND_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_and(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT32x4_AND_DEFINED +#endif +#ifndef VUINT32x4_OR_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_or(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT32x4_OR_DEFINED +#endif +#ifndef VUINT32x4_XOR_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_xor(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT32x4_XOR_DEFINED +#endif +#ifndef VUINT32x4_CMPLT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmplt(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT32x4_CMPLT_DEFINED +#endif +#ifndef VUINT32x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpeq(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT32x4_CMPEQ_DEFINED +#endif +#ifndef VUINT32x4_CMPGT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpgt(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT32x4_CMPGT_DEFINED +#endif +#ifndef VUINT32x4_CMPLE_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmple(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT32x4_CMPLE_DEFINED +#endif +#ifndef VUINT32x4_CMPGE_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_cmpge(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT32x4_CMPGE_DEFINED +#endif +#ifndef VUINT32x4_MIN_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_min(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x4_MIN_DEFINED +#endif +#ifndef VUINT32x4_MAX_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_max(vuint32x4 vec1, vuint32x4 vec2) +{ + vuint32x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x4_MAX_DEFINED +#endif +#ifndef VUINT32x4_AVG_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_avg(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x4_AVG_DEFINED +#endif +#ifndef VUINT32x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_lshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x4_LSHIFT_DEFINED +#endif +#ifndef VUINT32x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_rshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x4_RSHIFT_DEFINED +#endif +#ifndef VUINT32x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_lrshift(vuint32x4 vec1, vuint32x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x4_LRSHIFT_DEFINED +#endif +#ifndef VUINT32x4_NOT_DEFINED +VEC_FUNC_IMPL vuint32x4 vuint32x4_not(vuint32x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x4_NOT_DEFINED +#endif + + +/* vuint32x8 */ + +#ifndef VINT32x8_SPLAT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_splat(vec_int32 x) +{ + vint32x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT32x8_SPLAT_DEFINED +#endif +#ifndef VINT32x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_load_aligned(const vec_int32 x[8]) +{ + vint32x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT32x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT32x8_LOAD_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_load(const vec_int32 x[8]) +{ + vint32x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT32x8_LOAD_DEFINED +#endif +#ifndef VINT32x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint32x8_store_aligned(vint32x8 vec, vec_int32 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT32x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT32x8_STORE_DEFINED +VEC_FUNC_IMPL void vint32x8_store(vint32x8 vec, vec_int32 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT32x8_STORE_DEFINED +#endif +#ifndef VINT32x8_ADD_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_add(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT32x8_ADD_DEFINED +#endif +#ifndef VINT32x8_SUB_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_sub(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT32x8_SUB_DEFINED +#endif +#ifndef VINT32x8_MUL_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_mul(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT32x8_MUL_DEFINED +#endif +#ifndef VINT32x8_AND_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_and(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x8_AND_DEFINED +#endif +#ifndef VINT32x8_OR_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_or(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x8_OR_DEFINED +#endif +#ifndef VINT32x8_XOR_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_xor(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x8_XOR_DEFINED +#endif +#ifndef VINT32x8_CMPLT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_cmplt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x8_CMPLT_DEFINED +#endif +#ifndef VINT32x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_cmpeq(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x8_CMPEQ_DEFINED +#endif +#ifndef VINT32x8_CMPGT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_cmpgt(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x8_CMPGT_DEFINED +#endif +#ifndef VINT32x8_CMPLE_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_cmple(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x8_CMPLE_DEFINED +#endif +#ifndef VINT32x8_CMPGE_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_cmpge(vint32x8 vec1, vint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x8_CMPGE_DEFINED +#endif +#ifndef VINT32x8_MIN_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_min(vint32x8 vec1, vint32x8 vec2) +{ + vint32x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x8_MIN_DEFINED +#endif +#ifndef VINT32x8_MAX_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_max(vint32x8 vec1, vint32x8 vec2) +{ + vint32x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x8_MAX_DEFINED +#endif +#ifndef VINT32x8_AVG_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_avg(vint32x8 vec1, vint32x8 vec2) +{ + vint32x8 ones = vint32x8_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT32x8_AVG_DEFINED +#endif +#ifndef VINT32x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_lshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x8_LSHIFT_DEFINED +#endif +#ifndef VINT32x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_rshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x8_RSHIFT_DEFINED +#endif +#ifndef VINT32x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_lrshift(vint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x8_LRSHIFT_DEFINED +#endif +#ifndef VINT32x8_NOT_DEFINED +VEC_FUNC_IMPL vint32x8 vint32x8_not(vint32x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x8_NOT_DEFINED +#endif + + +/* vint32x8 */ + +#ifndef VUINT32x8_SPLAT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_splat(vec_uint32 x) +{ + vuint32x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT32x8_SPLAT_DEFINED +#endif +#ifndef VUINT32x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_load_aligned(const vec_uint32 x[8]) +{ + vuint32x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT32x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT32x8_LOAD_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_load(const vec_uint32 x[8]) +{ + vuint32x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT32x8_LOAD_DEFINED +#endif +#ifndef VUINT32x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint32x8_store_aligned(vuint32x8 vec, vec_uint32 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT32x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT32x8_STORE_DEFINED +VEC_FUNC_IMPL void vuint32x8_store(vuint32x8 vec, vec_uint32 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT32x8_STORE_DEFINED +#endif +#ifndef VUINT32x8_ADD_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_add(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT32x8_ADD_DEFINED +#endif +#ifndef VUINT32x8_SUB_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_sub(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT32x8_SUB_DEFINED +#endif +#ifndef VUINT32x8_MUL_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_mul(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT32x8_MUL_DEFINED +#endif +#ifndef VUINT32x8_AND_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_and(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT32x8_AND_DEFINED +#endif +#ifndef VUINT32x8_OR_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_or(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT32x8_OR_DEFINED +#endif +#ifndef VUINT32x8_XOR_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_xor(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT32x8_XOR_DEFINED +#endif +#ifndef VUINT32x8_CMPLT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmplt(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT32x8_CMPLT_DEFINED +#endif +#ifndef VUINT32x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpeq(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT32x8_CMPEQ_DEFINED +#endif +#ifndef VUINT32x8_CMPGT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpgt(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT32x8_CMPGT_DEFINED +#endif +#ifndef VUINT32x8_CMPLE_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmple(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT32x8_CMPLE_DEFINED +#endif +#ifndef VUINT32x8_CMPGE_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_cmpge(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT32x8_CMPGE_DEFINED +#endif +#ifndef VUINT32x8_MIN_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_min(vuint32x8 vec1, vuint32x8 vec2) +{ + vuint32x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x8_MIN_DEFINED +#endif +#ifndef VUINT32x8_MAX_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_max(vuint32x8 vec1, vuint32x8 vec2) +{ + vuint32x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x8_MAX_DEFINED +#endif +#ifndef VUINT32x8_AVG_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_avg(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x8_AVG_DEFINED +#endif +#ifndef VUINT32x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_lshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x8_LSHIFT_DEFINED +#endif +#ifndef VUINT32x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_rshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x8_RSHIFT_DEFINED +#endif +#ifndef VUINT32x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_lrshift(vuint32x8 vec1, vuint32x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x8_LRSHIFT_DEFINED +#endif +#ifndef VUINT32x8_NOT_DEFINED +VEC_FUNC_IMPL vuint32x8 vuint32x8_not(vuint32x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x8_NOT_DEFINED +#endif + + +/* vuint32x16 */ + +#ifndef VINT32x16_SPLAT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_splat(vec_int32 x) +{ + vint32x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT32x16_SPLAT_DEFINED +#endif +#ifndef VINT32x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_load_aligned(const vec_int32 x[16]) +{ + vint32x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT32x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT32x16_LOAD_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_load(const vec_int32 x[16]) +{ + vint32x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT32x16_LOAD_DEFINED +#endif +#ifndef VINT32x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint32x16_store_aligned(vint32x16 vec, vec_int32 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT32x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT32x16_STORE_DEFINED +VEC_FUNC_IMPL void vint32x16_store(vint32x16 vec, vec_int32 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT32x16_STORE_DEFINED +#endif +#ifndef VINT32x16_ADD_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_add(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT32x16_ADD_DEFINED +#endif +#ifndef VINT32x16_SUB_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_sub(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT32x16_SUB_DEFINED +#endif +#ifndef VINT32x16_MUL_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_mul(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT32x16_MUL_DEFINED +#endif +#ifndef VINT32x16_AND_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_and(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT32x16_AND_DEFINED +#endif +#ifndef VINT32x16_OR_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_or(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT32x16_OR_DEFINED +#endif +#ifndef VINT32x16_XOR_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_xor(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT32x16_XOR_DEFINED +#endif +#ifndef VINT32x16_CMPLT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_cmplt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT32x16_CMPLT_DEFINED +#endif +#ifndef VINT32x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_cmpeq(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT32x16_CMPEQ_DEFINED +#endif +#ifndef VINT32x16_CMPGT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_cmpgt(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT32x16_CMPGT_DEFINED +#endif +#ifndef VINT32x16_CMPLE_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_cmple(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT32x16_CMPLE_DEFINED +#endif +#ifndef VINT32x16_CMPGE_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_cmpge(vint32x16 vec1, vint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT32x16_CMPGE_DEFINED +#endif +#ifndef VINT32x16_MIN_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_min(vint32x16 vec1, vint32x16 vec2) +{ + vint32x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x16_MIN_DEFINED +#endif +#ifndef VINT32x16_MAX_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_max(vint32x16 vec1, vint32x16 vec2) +{ + vint32x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT32x16_MAX_DEFINED +#endif +#ifndef VINT32x16_AVG_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_avg(vint32x16 vec1, vint32x16 vec2) +{ + vint32x16 ones = vint32x16_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT32x16_AVG_DEFINED +#endif +#ifndef VINT32x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_lshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT32x16_LSHIFT_DEFINED +#endif +#ifndef VINT32x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_rshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x16_RSHIFT_DEFINED +#endif +#ifndef VINT32x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_lrshift(vint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT32x16_LRSHIFT_DEFINED +#endif +#ifndef VINT32x16_NOT_DEFINED +VEC_FUNC_IMPL vint32x16 vint32x16_not(vint32x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT32x16_NOT_DEFINED +#endif + + +/* vint32x16 */ + +#ifndef VUINT32x16_SPLAT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_splat(vec_uint32 x) +{ + vuint32x16 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT32x16_SPLAT_DEFINED +#endif +#ifndef VUINT32x16_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_load_aligned(const vec_uint32 x[16]) +{ + vuint32x16 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT32x16_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT32x16_LOAD_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_load(const vec_uint32 x[16]) +{ + vuint32x16 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT32x16_LOAD_DEFINED +#endif +#ifndef VUINT32x16_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint32x16_store_aligned(vuint32x16 vec, vec_uint32 arr[16]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT32x16_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT32x16_STORE_DEFINED +VEC_FUNC_IMPL void vuint32x16_store(vuint32x16 vec, vec_uint32 arr[16]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT32x16_STORE_DEFINED +#endif +#ifndef VUINT32x16_ADD_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_add(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT32x16_ADD_DEFINED +#endif +#ifndef VUINT32x16_SUB_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_sub(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT32x16_SUB_DEFINED +#endif +#ifndef VUINT32x16_MUL_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_mul(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT32x16_MUL_DEFINED +#endif +#ifndef VUINT32x16_AND_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_and(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT32x16_AND_DEFINED +#endif +#ifndef VUINT32x16_OR_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_or(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT32x16_OR_DEFINED +#endif +#ifndef VUINT32x16_XOR_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_xor(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT32x16_XOR_DEFINED +#endif +#ifndef VUINT32x16_CMPLT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmplt(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT32x16_CMPLT_DEFINED +#endif +#ifndef VUINT32x16_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpeq(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT32x16_CMPEQ_DEFINED +#endif +#ifndef VUINT32x16_CMPGT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpgt(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT32x16_CMPGT_DEFINED +#endif +#ifndef VUINT32x16_CMPLE_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmple(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT32x16_CMPLE_DEFINED +#endif +#ifndef VUINT32x16_CMPGE_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_cmpge(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT32x16_CMPGE_DEFINED +#endif +#ifndef VUINT32x16_MIN_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_min(vuint32x16 vec1, vuint32x16 vec2) +{ + vuint32x16 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x16_MIN_DEFINED +#endif +#ifndef VUINT32x16_MAX_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_max(vuint32x16 vec1, vuint32x16 vec2) +{ + vuint32x16 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT32x16_MAX_DEFINED +#endif +#ifndef VUINT32x16_AVG_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_avg(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT32x16_AVG_DEFINED +#endif +#ifndef VUINT32x16_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_lshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT32x16_LSHIFT_DEFINED +#endif +#ifndef VUINT32x16_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_rshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x16_RSHIFT_DEFINED +#endif +#ifndef VUINT32x16_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_lrshift(vuint32x16 vec1, vuint32x16 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint32 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT32x16_LRSHIFT_DEFINED +#endif +#ifndef VUINT32x16_NOT_DEFINED +VEC_FUNC_IMPL vuint32x16 vuint32x16_not(vuint32x16 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT32x16_NOT_DEFINED +#endif + + +/* vuint64x2 */ + +#ifndef VINT64x2_SPLAT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_splat(vec_int64 x) +{ + vint64x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VINT64x2_SPLAT_DEFINED +#endif +#ifndef VINT64x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_load_aligned(const vec_int64 x[2]) +{ + vint64x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT64x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT64x2_LOAD_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_load(const vec_int64 x[2]) +{ + vint64x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT64x2_LOAD_DEFINED +#endif +#ifndef VINT64x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint64x2_store_aligned(vint64x2 vec, vec_int64 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT64x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT64x2_STORE_DEFINED +VEC_FUNC_IMPL void vint64x2_store(vint64x2 vec, vec_int64 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT64x2_STORE_DEFINED +#endif +#ifndef VINT64x2_ADD_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_add(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT64x2_ADD_DEFINED +#endif +#ifndef VINT64x2_SUB_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_sub(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT64x2_SUB_DEFINED +#endif +#ifndef VINT64x2_MUL_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_mul(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT64x2_MUL_DEFINED +#endif +#ifndef VINT64x2_AND_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_and(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x2_AND_DEFINED +#endif +#ifndef VINT64x2_OR_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_or(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x2_OR_DEFINED +#endif +#ifndef VINT64x2_XOR_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_xor(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x2_XOR_DEFINED +#endif +#ifndef VINT64x2_CMPLT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_cmplt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x2_CMPLT_DEFINED +#endif +#ifndef VINT64x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_cmpeq(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x2_CMPEQ_DEFINED +#endif +#ifndef VINT64x2_CMPGT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_cmpgt(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x2_CMPGT_DEFINED +#endif +#ifndef VINT64x2_CMPLE_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_cmple(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x2_CMPLE_DEFINED +#endif +#ifndef VINT64x2_CMPGE_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_cmpge(vint64x2 vec1, vint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x2_CMPGE_DEFINED +#endif +#ifndef VINT64x2_MIN_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_min(vint64x2 vec1, vint64x2 vec2) +{ + vint64x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x2_MIN_DEFINED +#endif +#ifndef VINT64x2_MAX_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_max(vint64x2 vec1, vint64x2 vec2) +{ + vint64x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x2_MAX_DEFINED +#endif +#ifndef VINT64x2_AVG_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_avg(vint64x2 vec1, vint64x2 vec2) +{ + vint64x2 ones = vint64x2_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT64x2_AVG_DEFINED +#endif +#ifndef VINT64x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_lshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x2_LSHIFT_DEFINED +#endif +#ifndef VINT64x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_rshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x2_RSHIFT_DEFINED +#endif +#ifndef VINT64x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_lrshift(vint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x2_LRSHIFT_DEFINED +#endif +#ifndef VINT64x2_NOT_DEFINED +VEC_FUNC_IMPL vint64x2 vint64x2_not(vint64x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x2_NOT_DEFINED +#endif + + +/* vint64x2 */ + +#ifndef VUINT64x2_SPLAT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_splat(vec_uint64 x) +{ + vuint64x2 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,}; + return vec; +} +# define VUINT64x2_SPLAT_DEFINED +#endif +#ifndef VUINT64x2_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_load_aligned(const vec_uint64 x[2]) +{ + vuint64x2 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT64x2_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT64x2_LOAD_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_load(const vec_uint64 x[2]) +{ + vuint64x2 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT64x2_LOAD_DEFINED +#endif +#ifndef VUINT64x2_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint64x2_store_aligned(vuint64x2 vec, vec_uint64 arr[2]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT64x2_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT64x2_STORE_DEFINED +VEC_FUNC_IMPL void vuint64x2_store(vuint64x2 vec, vec_uint64 arr[2]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT64x2_STORE_DEFINED +#endif +#ifndef VUINT64x2_ADD_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_add(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT64x2_ADD_DEFINED +#endif +#ifndef VUINT64x2_SUB_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_sub(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT64x2_SUB_DEFINED +#endif +#ifndef VUINT64x2_MUL_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_mul(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT64x2_MUL_DEFINED +#endif +#ifndef VUINT64x2_AND_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_and(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT64x2_AND_DEFINED +#endif +#ifndef VUINT64x2_OR_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_or(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT64x2_OR_DEFINED +#endif +#ifndef VUINT64x2_XOR_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_xor(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT64x2_XOR_DEFINED +#endif +#ifndef VUINT64x2_CMPLT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmplt(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT64x2_CMPLT_DEFINED +#endif +#ifndef VUINT64x2_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpeq(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT64x2_CMPEQ_DEFINED +#endif +#ifndef VUINT64x2_CMPGT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpgt(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT64x2_CMPGT_DEFINED +#endif +#ifndef VUINT64x2_CMPLE_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmple(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT64x2_CMPLE_DEFINED +#endif +#ifndef VUINT64x2_CMPGE_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_cmpge(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT64x2_CMPGE_DEFINED +#endif +#ifndef VUINT64x2_MIN_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_min(vuint64x2 vec1, vuint64x2 vec2) +{ + vuint64x2 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x2_MIN_DEFINED +#endif +#ifndef VUINT64x2_MAX_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_max(vuint64x2 vec1, vuint64x2 vec2) +{ + vuint64x2 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x2_MAX_DEFINED +#endif +#ifndef VUINT64x2_AVG_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_avg(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x2_AVG_DEFINED +#endif +#ifndef VUINT64x2_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_lshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT64x2_LSHIFT_DEFINED +#endif +#ifndef VUINT64x2_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_rshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x2_RSHIFT_DEFINED +#endif +#ifndef VUINT64x2_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_lrshift(vuint64x2 vec1, vuint64x2 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(16))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x2_LRSHIFT_DEFINED +#endif +#ifndef VUINT64x2_NOT_DEFINED +VEC_FUNC_IMPL vuint64x2 vuint64x2_not(vuint64x2 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x2_NOT_DEFINED +#endif + + +/* vuint64x4 */ + +#ifndef VINT64x4_SPLAT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_splat(vec_int64 x) +{ + vint64x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VINT64x4_SPLAT_DEFINED +#endif +#ifndef VINT64x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_load_aligned(const vec_int64 x[4]) +{ + vint64x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT64x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT64x4_LOAD_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_load(const vec_int64 x[4]) +{ + vint64x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT64x4_LOAD_DEFINED +#endif +#ifndef VINT64x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint64x4_store_aligned(vint64x4 vec, vec_int64 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT64x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT64x4_STORE_DEFINED +VEC_FUNC_IMPL void vint64x4_store(vint64x4 vec, vec_int64 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT64x4_STORE_DEFINED +#endif +#ifndef VINT64x4_ADD_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_add(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT64x4_ADD_DEFINED +#endif +#ifndef VINT64x4_SUB_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_sub(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT64x4_SUB_DEFINED +#endif +#ifndef VINT64x4_MUL_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_mul(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT64x4_MUL_DEFINED +#endif +#ifndef VINT64x4_AND_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_and(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x4_AND_DEFINED +#endif +#ifndef VINT64x4_OR_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_or(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x4_OR_DEFINED +#endif +#ifndef VINT64x4_XOR_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_xor(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x4_XOR_DEFINED +#endif +#ifndef VINT64x4_CMPLT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_cmplt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x4_CMPLT_DEFINED +#endif +#ifndef VINT64x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_cmpeq(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x4_CMPEQ_DEFINED +#endif +#ifndef VINT64x4_CMPGT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_cmpgt(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x4_CMPGT_DEFINED +#endif +#ifndef VINT64x4_CMPLE_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_cmple(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x4_CMPLE_DEFINED +#endif +#ifndef VINT64x4_CMPGE_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_cmpge(vint64x4 vec1, vint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x4_CMPGE_DEFINED +#endif +#ifndef VINT64x4_MIN_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_min(vint64x4 vec1, vint64x4 vec2) +{ + vint64x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x4_MIN_DEFINED +#endif +#ifndef VINT64x4_MAX_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_max(vint64x4 vec1, vint64x4 vec2) +{ + vint64x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x4_MAX_DEFINED +#endif +#ifndef VINT64x4_AVG_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_avg(vint64x4 vec1, vint64x4 vec2) +{ + vint64x4 ones = vint64x4_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT64x4_AVG_DEFINED +#endif +#ifndef VINT64x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_lshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x4_LSHIFT_DEFINED +#endif +#ifndef VINT64x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_rshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x4_RSHIFT_DEFINED +#endif +#ifndef VINT64x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_lrshift(vint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x4_LRSHIFT_DEFINED +#endif +#ifndef VINT64x4_NOT_DEFINED +VEC_FUNC_IMPL vint64x4 vint64x4_not(vint64x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x4_NOT_DEFINED +#endif + + +/* vint64x4 */ + +#ifndef VUINT64x4_SPLAT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_splat(vec_uint64 x) +{ + vuint64x4 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,}; + return vec; +} +# define VUINT64x4_SPLAT_DEFINED +#endif +#ifndef VUINT64x4_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_load_aligned(const vec_uint64 x[4]) +{ + vuint64x4 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT64x4_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT64x4_LOAD_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_load(const vec_uint64 x[4]) +{ + vuint64x4 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT64x4_LOAD_DEFINED +#endif +#ifndef VUINT64x4_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint64x4_store_aligned(vuint64x4 vec, vec_uint64 arr[4]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT64x4_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT64x4_STORE_DEFINED +VEC_FUNC_IMPL void vuint64x4_store(vuint64x4 vec, vec_uint64 arr[4]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT64x4_STORE_DEFINED +#endif +#ifndef VUINT64x4_ADD_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_add(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT64x4_ADD_DEFINED +#endif +#ifndef VUINT64x4_SUB_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_sub(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT64x4_SUB_DEFINED +#endif +#ifndef VUINT64x4_MUL_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_mul(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT64x4_MUL_DEFINED +#endif +#ifndef VUINT64x4_AND_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_and(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT64x4_AND_DEFINED +#endif +#ifndef VUINT64x4_OR_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_or(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT64x4_OR_DEFINED +#endif +#ifndef VUINT64x4_XOR_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_xor(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT64x4_XOR_DEFINED +#endif +#ifndef VUINT64x4_CMPLT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmplt(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT64x4_CMPLT_DEFINED +#endif +#ifndef VUINT64x4_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpeq(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT64x4_CMPEQ_DEFINED +#endif +#ifndef VUINT64x4_CMPGT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpgt(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT64x4_CMPGT_DEFINED +#endif +#ifndef VUINT64x4_CMPLE_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmple(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT64x4_CMPLE_DEFINED +#endif +#ifndef VUINT64x4_CMPGE_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_cmpge(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT64x4_CMPGE_DEFINED +#endif +#ifndef VUINT64x4_MIN_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_min(vuint64x4 vec1, vuint64x4 vec2) +{ + vuint64x4 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x4_MIN_DEFINED +#endif +#ifndef VUINT64x4_MAX_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_max(vuint64x4 vec1, vuint64x4 vec2) +{ + vuint64x4 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x4_MAX_DEFINED +#endif +#ifndef VUINT64x4_AVG_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_avg(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x4_AVG_DEFINED +#endif +#ifndef VUINT64x4_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_lshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT64x4_LSHIFT_DEFINED +#endif +#ifndef VUINT64x4_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_rshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x4_RSHIFT_DEFINED +#endif +#ifndef VUINT64x4_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_lrshift(vuint64x4 vec1, vuint64x4 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(32))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x4_LRSHIFT_DEFINED +#endif +#ifndef VUINT64x4_NOT_DEFINED +VEC_FUNC_IMPL vuint64x4 vuint64x4_not(vuint64x4 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x4_NOT_DEFINED +#endif + + +/* vuint64x8 */ + +#ifndef VINT64x8_SPLAT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_splat(vec_int64 x) +{ + vint64x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VINT64x8_SPLAT_DEFINED +#endif +#ifndef VINT64x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_load_aligned(const vec_int64 x[8]) +{ + vint64x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VINT64x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VINT64x8_LOAD_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_load(const vec_int64 x[8]) +{ + vint64x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VINT64x8_LOAD_DEFINED +#endif +#ifndef VINT64x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vint64x8_store_aligned(vint64x8 vec, vec_int64 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VINT64x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VINT64x8_STORE_DEFINED +VEC_FUNC_IMPL void vint64x8_store(vint64x8 vec, vec_int64 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VINT64x8_STORE_DEFINED +#endif +#ifndef VINT64x8_ADD_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_add(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VINT64x8_ADD_DEFINED +#endif +#ifndef VINT64x8_SUB_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_sub(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VINT64x8_SUB_DEFINED +#endif +#ifndef VINT64x8_MUL_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_mul(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VINT64x8_MUL_DEFINED +#endif +#ifndef VINT64x8_AND_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_and(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VINT64x8_AND_DEFINED +#endif +#ifndef VINT64x8_OR_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_or(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VINT64x8_OR_DEFINED +#endif +#ifndef VINT64x8_XOR_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_xor(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VINT64x8_XOR_DEFINED +#endif +#ifndef VINT64x8_CMPLT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_cmplt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VINT64x8_CMPLT_DEFINED +#endif +#ifndef VINT64x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_cmpeq(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VINT64x8_CMPEQ_DEFINED +#endif +#ifndef VINT64x8_CMPGT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_cmpgt(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VINT64x8_CMPGT_DEFINED +#endif +#ifndef VINT64x8_CMPLE_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_cmple(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VINT64x8_CMPLE_DEFINED +#endif +#ifndef VINT64x8_CMPGE_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_cmpge(vint64x8 vec1, vint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VINT64x8_CMPGE_DEFINED +#endif +#ifndef VINT64x8_MIN_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_min(vint64x8 vec1, vint64x8 vec2) +{ + vint64x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x8_MIN_DEFINED +#endif +#ifndef VINT64x8_MAX_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_max(vint64x8 vec1, vint64x8 vec2) +{ + vint64x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VINT64x8_MAX_DEFINED +#endif +#ifndef VINT64x8_AVG_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_avg(vint64x8 vec1, vint64x8 vec2) +{ + vint64x8 ones = vint64x8_splat(1); + __typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2); + __typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2); + __typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2); + __typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2); + + vec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc); + return vec1; +} +# define VINT64x8_AVG_DEFINED +#endif +#ifndef VINT64x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_lshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VINT64x8_LSHIFT_DEFINED +#endif +#ifndef VINT64x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_rshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x8_RSHIFT_DEFINED +#endif +#ifndef VINT64x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_lrshift(vint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VINT64x8_LRSHIFT_DEFINED +#endif +#ifndef VINT64x8_NOT_DEFINED +VEC_FUNC_IMPL vint64x8 vint64x8_not(vint64x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VINT64x8_NOT_DEFINED +#endif + + +/* vint64x8 */ + +#ifndef VUINT64x8_SPLAT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_splat(vec_uint64 x) +{ + vuint64x8 vec; + vec.gcc = (__typeof__(vec.gcc)){x,x,x,x,x,x,x,x,}; + return vec; +} +# define VUINT64x8_SPLAT_DEFINED +#endif +#ifndef VUINT64x8_LOAD_ALIGNED_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_load_aligned(const vec_uint64 x[8]) +{ + vuint64x8 vec; + vec.gcc = *(__typeof__(vec.gcc) *)x; + return vec; +} +# define VUINT64x8_LOAD_ALIGNED_DEFINED +#endif +#ifndef VUINT64x8_LOAD_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_load(const vec_uint64 x[8]) +{ + vuint64x8 vec; + memcpy(&vec, x, sizeof(vec)); + return vec; +} +# define VUINT64x8_LOAD_DEFINED +#endif +#ifndef VUINT64x8_STORE_ALIGNED_DEFINED +VEC_FUNC_IMPL void vuint64x8_store_aligned(vuint64x8 vec, vec_uint64 arr[8]) +{ + *(__typeof__(vec.gcc) *)arr = vec.gcc; +} +# define VUINT64x8_STORE_ALIGNED_DEFINED +#endif +#ifndef VUINT64x8_STORE_DEFINED +VEC_FUNC_IMPL void vuint64x8_store(vuint64x8 vec, vec_uint64 arr[8]) +{ + memcpy(arr, &vec, sizeof(vec)); +} +# define VUINT64x8_STORE_DEFINED +#endif +#ifndef VUINT64x8_ADD_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_add(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc + vec2.gcc); + return vec1; +} +# define VUINT64x8_ADD_DEFINED +#endif +#ifndef VUINT64x8_SUB_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_sub(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc - vec2.gcc); + return vec1; +} +# define VUINT64x8_SUB_DEFINED +#endif +#ifndef VUINT64x8_MUL_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_mul(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc * vec2.gcc); + return vec1; +} +# define VUINT64x8_MUL_DEFINED +#endif +#ifndef VUINT64x8_AND_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_and(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc & vec2.gcc); + return vec1; +} +# define VUINT64x8_AND_DEFINED +#endif +#ifndef VUINT64x8_OR_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_or(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc | vec2.gcc); + return vec1; +} +# define VUINT64x8_OR_DEFINED +#endif +#ifndef VUINT64x8_XOR_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_xor(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc ^ vec2.gcc); + return vec1; +} +# define VUINT64x8_XOR_DEFINED +#endif +#ifndef VUINT64x8_CMPLT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmplt(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc < vec2.gcc); + return vec1; +} +# define VUINT64x8_CMPLT_DEFINED +#endif +#ifndef VUINT64x8_CMPEQ_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpeq(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc == vec2.gcc); + return vec1; +} +# define VUINT64x8_CMPEQ_DEFINED +#endif +#ifndef VUINT64x8_CMPGT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpgt(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc > vec2.gcc); + return vec1; +} +# define VUINT64x8_CMPGT_DEFINED +#endif +#ifndef VUINT64x8_CMPLE_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmple(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc <= vec2.gcc); + return vec1; +} +# define VUINT64x8_CMPLE_DEFINED +#endif +#ifndef VUINT64x8_CMPGE_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_cmpge(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >= vec2.gcc); + return vec1; +} +# define VUINT64x8_CMPGE_DEFINED +#endif +#ifndef VUINT64x8_MIN_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_min(vuint64x8 vec1, vuint64x8 vec2) +{ + vuint64x8 mask; + mask.gcc = (vec1.gcc < vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x8_MIN_DEFINED +#endif +#ifndef VUINT64x8_MAX_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_max(vuint64x8 vec1, vuint64x8 vec2) +{ + vuint64x8 mask; + mask.gcc = (vec1.gcc > vec2.gcc); + vec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc); + return vec1; +} +# define VUINT64x8_MAX_DEFINED +#endif +#ifndef VUINT64x8_AVG_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_avg(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1); + return vec1; +} +# define VUINT64x8_AVG_DEFINED +#endif +#ifndef VUINT64x8_LSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_lshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc << vec2.gcc); + return vec1; +} +# define VUINT64x8_LSHIFT_DEFINED +#endif +#ifndef VUINT64x8_RSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_rshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x8_RSHIFT_DEFINED +#endif +#ifndef VUINT64x8_LRSHIFT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_lrshift(vuint64x8 vec1, vuint64x8 vec2) +{ + vec1.gcc = (__typeof__(vec1.gcc))((vec_uint64 __attribute__((__vector_size__(64))))vec1.gcc >> vec2.gcc); + return vec1; +} +# define VUINT64x8_LRSHIFT_DEFINED +#endif +#ifndef VUINT64x8_NOT_DEFINED +VEC_FUNC_IMPL vuint64x8 vuint64x8_not(vuint64x8 vec) +{ + vec.gcc = ~vec.gcc; + return vec; +} +# define VUINT64x8_NOT_DEFINED +#endif +#endif /* VEC_IMPL_GCC_H_ */ + diff -r f9ca85d2f14c -r 55cadb1fac4b include/vec/impl/generic.h --- a/include/vec/impl/generic.h Sat Apr 26 15:31:39 2025 -0400 +++ b/include/vec/impl/generic.h Sun Apr 27 02:49:53 2025 -0400 @@ -99,6 +99,12 @@ VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \ } +#define VEC_GENERIC_MOD(sign, bits, size) \ + VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ + { \ + VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \ + } + #define VEC_GENERIC_AVG(sign, bits, size) \ VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ { \ @@ -250,6 +256,7 @@ #define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign) #define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign) #define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign) +#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign) #define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign) #define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u) #define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u) @@ -317,6 +324,10 @@ VEC_GENERIC_DIV(/* nothing */, 8, 2) # define VINT8x2_DIV_DEFINED #endif +#ifndef VINT8x2_MOD_DEFINED +VEC_GENERIC_MOD(/* nothing */, 8, 2) +# define VINT8x2_MOD_DEFINED +#endif #ifndef VINT8x2_AVG_DEFINED VEC_GENERIC_AVG(/* nothing */, 8, 2) # define VINT8x2_AVG_DEFINED @@ -417,6 +428,10 @@ VEC_GENERIC_DIV(u, 8, 2) # define VUINT8x2_DIV_DEFINED #endif +#ifndef VUINT8x2_MOD_DEFINED +VEC_GENERIC_MOD(u, 8, 2) +# define VUINT8x2_MOD_DEFINED +#endif #ifndef VUINT8x2_AVG_DEFINED VEC_GENERIC_AVG(u, 8, 2) # define VUINT8x2_AVG_DEFINED @@ -526,6 +541,11 @@ # define VINT8x4_DIV_DEFINED #endif +#ifndef VINT8x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 8, 4, 2) +# define VINT8x4_MOD_DEFINED +#endif + #ifndef VINT8x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 8, 4, 2) # define VINT8x4_AVG_DEFINED @@ -650,6 +670,11 @@ # define VUINT8x4_DIV_DEFINED #endif +#ifndef VUINT8x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 8, 4, 2) +# define VUINT8x4_MOD_DEFINED +#endif + #ifndef VUINT8x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 8, 4, 2) # define VUINT8x4_AVG_DEFINED @@ -774,6 +799,11 @@ # define VINT8x8_DIV_DEFINED #endif +#ifndef VINT8x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 8, 8, 4) +# define VINT8x8_MOD_DEFINED +#endif + #ifndef VINT8x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 8, 8, 4) # define VINT8x8_AVG_DEFINED @@ -898,6 +928,11 @@ # define VUINT8x8_DIV_DEFINED #endif +#ifndef VUINT8x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 8, 8, 4) +# define VUINT8x8_MOD_DEFINED +#endif + #ifndef VUINT8x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 8, 8, 4) # define VUINT8x8_AVG_DEFINED @@ -1022,6 +1057,11 @@ # define VINT8x16_DIV_DEFINED #endif +#ifndef VINT8x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 8, 16, 8) +# define VINT8x16_MOD_DEFINED +#endif + #ifndef VINT8x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 8, 16, 8) # define VINT8x16_AVG_DEFINED @@ -1146,6 +1186,11 @@ # define VUINT8x16_DIV_DEFINED #endif +#ifndef VUINT8x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 8, 16, 8) +# define VUINT8x16_MOD_DEFINED +#endif + #ifndef VUINT8x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 8, 16, 8) # define VUINT8x16_AVG_DEFINED @@ -1270,6 +1315,11 @@ # define VINT8x32_DIV_DEFINED #endif +#ifndef VINT8x32_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 8, 32, 16) +# define VINT8x32_MOD_DEFINED +#endif + #ifndef VINT8x32_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 8, 32, 16) # define VINT8x32_AVG_DEFINED @@ -1394,6 +1444,11 @@ # define VUINT8x32_DIV_DEFINED #endif +#ifndef VUINT8x32_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 8, 32, 16) +# define VUINT8x32_MOD_DEFINED +#endif + #ifndef VUINT8x32_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 8, 32, 16) # define VUINT8x32_AVG_DEFINED @@ -1518,6 +1573,11 @@ # define VINT8x64_DIV_DEFINED #endif +#ifndef VINT8x64_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 8, 64, 32) +# define VINT8x64_MOD_DEFINED +#endif + #ifndef VINT8x64_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 8, 64, 32) # define VINT8x64_AVG_DEFINED @@ -1642,6 +1702,11 @@ # define VUINT8x64_DIV_DEFINED #endif +#ifndef VUINT8x64_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 8, 64, 32) +# define VUINT8x64_MOD_DEFINED +#endif + #ifndef VUINT8x64_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 8, 64, 32) # define VUINT8x64_AVG_DEFINED @@ -1757,6 +1822,10 @@ VEC_GENERIC_DIV(/* nothing */, 16, 2) # define VINT16x2_DIV_DEFINED #endif +#ifndef VINT16x2_MOD_DEFINED +VEC_GENERIC_MOD(/* nothing */, 16, 2) +# define VINT16x2_MOD_DEFINED +#endif #ifndef VINT16x2_AVG_DEFINED VEC_GENERIC_AVG(/* nothing */, 16, 2) # define VINT16x2_AVG_DEFINED @@ -1857,6 +1926,10 @@ VEC_GENERIC_DIV(u, 16, 2) # define VUINT16x2_DIV_DEFINED #endif +#ifndef VUINT16x2_MOD_DEFINED +VEC_GENERIC_MOD(u, 16, 2) +# define VUINT16x2_MOD_DEFINED +#endif #ifndef VUINT16x2_AVG_DEFINED VEC_GENERIC_AVG(u, 16, 2) # define VUINT16x2_AVG_DEFINED @@ -1966,6 +2039,11 @@ # define VINT16x4_DIV_DEFINED #endif +#ifndef VINT16x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 16, 4, 2) +# define VINT16x4_MOD_DEFINED +#endif + #ifndef VINT16x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 16, 4, 2) # define VINT16x4_AVG_DEFINED @@ -2090,6 +2168,11 @@ # define VUINT16x4_DIV_DEFINED #endif +#ifndef VUINT16x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 16, 4, 2) +# define VUINT16x4_MOD_DEFINED +#endif + #ifndef VUINT16x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 16, 4, 2) # define VUINT16x4_AVG_DEFINED @@ -2214,6 +2297,11 @@ # define VINT16x8_DIV_DEFINED #endif +#ifndef VINT16x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 16, 8, 4) +# define VINT16x8_MOD_DEFINED +#endif + #ifndef VINT16x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 16, 8, 4) # define VINT16x8_AVG_DEFINED @@ -2338,6 +2426,11 @@ # define VUINT16x8_DIV_DEFINED #endif +#ifndef VUINT16x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 16, 8, 4) +# define VUINT16x8_MOD_DEFINED +#endif + #ifndef VUINT16x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 16, 8, 4) # define VUINT16x8_AVG_DEFINED @@ -2462,6 +2555,11 @@ # define VINT16x16_DIV_DEFINED #endif +#ifndef VINT16x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 16, 16, 8) +# define VINT16x16_MOD_DEFINED +#endif + #ifndef VINT16x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 16, 16, 8) # define VINT16x16_AVG_DEFINED @@ -2586,6 +2684,11 @@ # define VUINT16x16_DIV_DEFINED #endif +#ifndef VUINT16x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 16, 16, 8) +# define VUINT16x16_MOD_DEFINED +#endif + #ifndef VUINT16x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 16, 16, 8) # define VUINT16x16_AVG_DEFINED @@ -2710,6 +2813,11 @@ # define VINT16x32_DIV_DEFINED #endif +#ifndef VINT16x32_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 16, 32, 16) +# define VINT16x32_MOD_DEFINED +#endif + #ifndef VINT16x32_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 16, 32, 16) # define VINT16x32_AVG_DEFINED @@ -2834,6 +2942,11 @@ # define VUINT16x32_DIV_DEFINED #endif +#ifndef VUINT16x32_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 16, 32, 16) +# define VUINT16x32_MOD_DEFINED +#endif + #ifndef VUINT16x32_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 16, 32, 16) # define VUINT16x32_AVG_DEFINED @@ -2949,6 +3062,10 @@ VEC_GENERIC_DIV(/* nothing */, 32, 2) # define VINT32x2_DIV_DEFINED #endif +#ifndef VINT32x2_MOD_DEFINED +VEC_GENERIC_MOD(/* nothing */, 32, 2) +# define VINT32x2_MOD_DEFINED +#endif #ifndef VINT32x2_AVG_DEFINED VEC_GENERIC_AVG(/* nothing */, 32, 2) # define VINT32x2_AVG_DEFINED @@ -3049,6 +3166,10 @@ VEC_GENERIC_DIV(u, 32, 2) # define VUINT32x2_DIV_DEFINED #endif +#ifndef VUINT32x2_MOD_DEFINED +VEC_GENERIC_MOD(u, 32, 2) +# define VUINT32x2_MOD_DEFINED +#endif #ifndef VUINT32x2_AVG_DEFINED VEC_GENERIC_AVG(u, 32, 2) # define VUINT32x2_AVG_DEFINED @@ -3158,6 +3279,11 @@ # define VINT32x4_DIV_DEFINED #endif +#ifndef VINT32x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 32, 4, 2) +# define VINT32x4_MOD_DEFINED +#endif + #ifndef VINT32x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 32, 4, 2) # define VINT32x4_AVG_DEFINED @@ -3282,6 +3408,11 @@ # define VUINT32x4_DIV_DEFINED #endif +#ifndef VUINT32x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 32, 4, 2) +# define VUINT32x4_MOD_DEFINED +#endif + #ifndef VUINT32x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 32, 4, 2) # define VUINT32x4_AVG_DEFINED @@ -3406,6 +3537,11 @@ # define VINT32x8_DIV_DEFINED #endif +#ifndef VINT32x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 32, 8, 4) +# define VINT32x8_MOD_DEFINED +#endif + #ifndef VINT32x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 32, 8, 4) # define VINT32x8_AVG_DEFINED @@ -3530,6 +3666,11 @@ # define VUINT32x8_DIV_DEFINED #endif +#ifndef VUINT32x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 32, 8, 4) +# define VUINT32x8_MOD_DEFINED +#endif + #ifndef VUINT32x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 32, 8, 4) # define VUINT32x8_AVG_DEFINED @@ -3654,6 +3795,11 @@ # define VINT32x16_DIV_DEFINED #endif +#ifndef VINT32x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 32, 16, 8) +# define VINT32x16_MOD_DEFINED +#endif + #ifndef VINT32x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 32, 16, 8) # define VINT32x16_AVG_DEFINED @@ -3778,6 +3924,11 @@ # define VUINT32x16_DIV_DEFINED #endif +#ifndef VUINT32x16_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 32, 16, 8) +# define VUINT32x16_MOD_DEFINED +#endif + #ifndef VUINT32x16_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 32, 16, 8) # define VUINT32x16_AVG_DEFINED @@ -3893,6 +4044,10 @@ VEC_GENERIC_DIV(/* nothing */, 64, 2) # define VINT64x2_DIV_DEFINED #endif +#ifndef VINT64x2_MOD_DEFINED +VEC_GENERIC_MOD(/* nothing */, 64, 2) +# define VINT64x2_MOD_DEFINED +#endif #ifndef VINT64x2_AVG_DEFINED VEC_GENERIC_AVG(/* nothing */, 64, 2) # define VINT64x2_AVG_DEFINED @@ -3993,6 +4148,10 @@ VEC_GENERIC_DIV(u, 64, 2) # define VUINT64x2_DIV_DEFINED #endif +#ifndef VUINT64x2_MOD_DEFINED +VEC_GENERIC_MOD(u, 64, 2) +# define VUINT64x2_MOD_DEFINED +#endif #ifndef VUINT64x2_AVG_DEFINED VEC_GENERIC_AVG(u, 64, 2) # define VUINT64x2_AVG_DEFINED @@ -4102,6 +4261,11 @@ # define VINT64x4_DIV_DEFINED #endif +#ifndef VINT64x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 64, 4, 2) +# define VINT64x4_MOD_DEFINED +#endif + #ifndef VINT64x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 64, 4, 2) # define VINT64x4_AVG_DEFINED @@ -4226,6 +4390,11 @@ # define VUINT64x4_DIV_DEFINED #endif +#ifndef VUINT64x4_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 64, 4, 2) +# define VUINT64x4_MOD_DEFINED +#endif + #ifndef VUINT64x4_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 64, 4, 2) # define VUINT64x4_AVG_DEFINED @@ -4350,6 +4519,11 @@ # define VINT64x8_DIV_DEFINED #endif +#ifndef VINT64x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(/* nothing */, 64, 8, 4) +# define VINT64x8_MOD_DEFINED +#endif + #ifndef VINT64x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(/* nothing */, 64, 8, 4) # define VINT64x8_AVG_DEFINED @@ -4474,6 +4648,11 @@ # define VUINT64x8_DIV_DEFINED #endif +#ifndef VUINT64x8_MOD_DEFINED +VEC_GENERIC_DBL_MOD(u, 64, 8, 4) +# define VUINT64x8_MOD_DEFINED +#endif + #ifndef VUINT64x8_AVG_DEFINED VEC_GENERIC_DBL_AVG(u, 64, 8, 4) # define VUINT64x8_AVG_DEFINED diff -r f9ca85d2f14c -r 55cadb1fac4b include/vec/vec.h --- a/include/vec/vec.h Sat Apr 26 15:31:39 2025 -0400 +++ b/include/vec/vec.h Sun Apr 27 02:49:53 2025 -0400 @@ -315,6 +315,10 @@ # endif #endif +#endif /* defined(VEC_SUPPRESS_HW) */ + +#if VEC_GNUC_ATLEAST(4, 0, 0) +# define VEC_COMPILER_HAS_GCC_VECTORS #endif #ifdef __cplusplus @@ -353,17 +357,17 @@ VEC_FUNC_IMPL vec_intmax vec_avg(vec_intmax x, vec_intmax y) { - vec_intmax x_d_rem = (x % 2); - vec_intmax y_d_rem = (y % 2); - vec_intmax rem_d_quot = ((x_d_rem + y_d_rem) / 2); - vec_intmax rem_d_rem = ((x_d_rem + y_d_rem) % 2); + vec_intmax x_d_rem = (x % 2); + vec_intmax y_d_rem = (y % 2); + vec_intmax rem_d_quot = ((x_d_rem + y_d_rem) / 2); + vec_intmax rem_d_rem = ((x_d_rem + y_d_rem) % 2); - return ((x / 2) + (y / 2)) + (rem_d_quot) + (rem_d_rem == 1); + return ((x / 2) + (y / 2)) + (rem_d_quot) + (rem_d_rem == 1); } VEC_FUNC_IMPL vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y) { - return (x >> 1) + (y >> 1) + ((x | y) & 1); + return (x >> 1) + (y >> 1) + ((x | y) & 1); } /* --------------------------------------------------------------- */ @@ -608,27 +612,45 @@ // 16-bit typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(2))) gcc; +#endif vec_uint8 generic[2]; } vuint8x2; typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(2))) gcc; +#endif vec_int8 generic[2]; } vint8x2; // 32-bit typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(4))) gcc; +#endif vuint8x2 generic[2]; } vuint8x4; typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint16 __attribute__((__vector_size__(4))) gcc; +#endif vec_uint16 generic[2]; } vuint16x2; typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(4))) gcc; +#endif vint8x2 generic[2]; } vint8x4; typedef union { +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int16 __attribute__((__vector_size__(4))) gcc; +#endif vec_int16 generic[2]; } vint16x2; @@ -640,6 +662,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint8x8_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(8))) gcc; +#endif vuint8x4 generic[2]; } vuint8x8; @@ -651,6 +676,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint16x4_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint16 __attribute__((__vector_size__(8))) gcc; +#endif vuint16x2 generic[2]; } vuint16x4; @@ -662,6 +690,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint32x2_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint32 __attribute__((__vector_size__(8))) gcc; +#endif vec_uint32 generic[2]; } vuint32x2; @@ -673,6 +704,9 @@ #ifdef VEC_COMPILER_HAS_NEON int8x8_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(8))) gcc; +#endif vint8x4 generic[2]; } vint8x8; @@ -684,6 +718,9 @@ #ifdef VEC_COMPILER_HAS_NEON int16x4_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int16 __attribute__((__vector_size__(8))) gcc; +#endif vint16x2 generic[2]; } vint16x4; @@ -695,6 +732,9 @@ #ifdef VEC_COMPILER_HAS_NEON int32x2_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int32 __attribute__((__vector_size__(8))) gcc; +#endif vec_int32 generic[2]; } vint32x2; @@ -710,6 +750,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint8x16_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(16))) gcc; +#endif vuint8x8 generic[2]; } vuint8x16; @@ -723,6 +766,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint16x8_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint16 __attribute__((__vector_size__(16))) gcc; +#endif vuint16x4 generic[2]; } vuint16x8; @@ -736,6 +782,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint32x4_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint32 __attribute__((__vector_size__(16))) gcc; +#endif vuint32x2 generic[2]; } vuint32x4; @@ -749,6 +798,9 @@ #ifdef VEC_COMPILER_HAS_NEON uint64x2_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint64 __attribute__((__vector_size__(16))) gcc; +#endif vec_uint64 generic[2]; } vuint64x2; @@ -762,6 +814,9 @@ #ifdef VEC_COMPILER_HAS_NEON int8x16_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(16))) gcc; +#endif vint8x8 generic[2]; } vint8x16; @@ -775,6 +830,9 @@ #ifdef VEC_COMPILER_HAS_NEON int16x8_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int16 __attribute__((__vector_size__(16))) gcc; +#endif vint16x4 generic[2]; } vint16x8; @@ -788,6 +846,9 @@ #ifdef VEC_COMPILER_HAS_NEON int32x4_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int32 __attribute__((__vector_size__(16))) gcc; +#endif vint32x2 generic[2]; } vint32x4; @@ -801,6 +862,9 @@ #ifdef VEC_COMPILER_HAS_NEON int64x2_t neon; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int64 __attribute__((__vector_size__(16))) gcc; +#endif vec_int64 generic[2]; } vint64x2; @@ -809,6 +873,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(32))) gcc; +#endif vuint8x16 generic[2]; } vuint8x32; @@ -816,6 +883,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint16 __attribute__((__vector_size__(32))) gcc; +#endif vuint16x8 generic[2]; } vuint16x16; @@ -823,6 +893,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint32 __attribute__((__vector_size__(32))) gcc; +#endif vuint32x4 generic[2]; } vuint32x8; @@ -830,6 +903,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint64 __attribute__((__vector_size__(32))) gcc; +#endif vuint64x2 generic[2]; } vuint64x4; @@ -837,6 +913,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(32))) gcc; +#endif vint8x16 generic[2]; } vint8x32; @@ -844,6 +923,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int16 __attribute__((__vector_size__(32))) gcc; +#endif vint16x8 generic[2]; } vint16x16; @@ -851,6 +933,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int32 __attribute__((__vector_size__(32))) gcc; +#endif vint32x4 generic[2]; } vint32x8; @@ -858,6 +943,9 @@ #ifdef VEC_COMPILER_HAS_AVX2 __m256i avx2; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int64 __attribute__((__vector_size__(32))) gcc; +#endif vint64x2 generic[2]; } vint64x4; @@ -866,6 +954,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint8 __attribute__((__vector_size__(64))) gcc; +#endif vuint8x32 generic[2]; } vuint8x64; @@ -873,6 +964,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint16 __attribute__((__vector_size__(64))) gcc; +#endif vuint16x16 generic[2]; } vuint16x32; @@ -880,6 +974,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint32 __attribute__((__vector_size__(64))) gcc; +#endif vuint32x8 generic[2]; } vuint32x16; @@ -887,6 +984,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_uint64 __attribute__((__vector_size__(64))) gcc; +#endif vuint64x4 generic[2]; } vuint64x8; @@ -894,6 +994,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int8 __attribute__((__vector_size__(64))) gcc; +#endif vint8x32 generic[2]; } vint8x64; @@ -901,6 +1004,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int16 __attribute__((__vector_size__(64))) gcc; +#endif vint16x16 generic[2]; } vint16x32; @@ -908,6 +1014,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int32 __attribute__((__vector_size__(64))) gcc; +#endif vint32x8 generic[2]; } vint32x16; @@ -915,6 +1024,9 @@ #ifdef VEC_COMPILER_HAS_AVX512F __m512i avx512f; #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS + vec_int64 __attribute__((__vector_size__(64))) gcc; +#endif vint64x4 generic[2]; } vint64x8; @@ -949,6 +1061,10 @@ # include "impl/x86/mmx.h" #endif +#ifdef VEC_COMPILER_HAS_GCC_VECTORS +# include "impl/gcc.h" +#endif + #include "impl/generic.h" /* ------------------------------------------------------------------------ */ diff -r f9ca85d2f14c -r 55cadb1fac4b test/Makefile.template --- a/test/Makefile.template Sat Apr 26 15:31:39 2025 -0400 +++ b/test/Makefile.template Sun Apr 27 02:49:53 2025 -0400 @@ -1,4 +1,4 @@ -CPPFLAGS += -O2 -I../include -Wall -Wpedantic -Werror=strict-aliasing +CPPFLAGS += -g -O2 -I../include -Wall -Wpedantic -Werror=strict-aliasing CFLAGS += $(CPPFLAGS) -std=c99 CXXFLAGS += $(CPPFLAGS) -std=c++11 @@ -16,6 +16,7 @@ ../include/vec/impl/x86/sse41.h \ ../include/vec/impl/x86/sse42.h \ ../include/vec/impl/generic.h \ + ../include/vec/impl/gcc.h \ test_align.h \ test_arith.h \ test_compare.h \ diff -r f9ca85d2f14c -r 55cadb1fac4b test/test_arith.h --- a/test/test_arith.h Sat Apr 26 15:31:39 2025 -0400 +++ b/test/test_arith.h Sun Apr 27 02:49:53 2025 -0400 @@ -1,21 +1,21 @@ -#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \ - static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##sign##int##bits##x##size b) \ +#define CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, secondsign, secondcsign) \ + static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, v##secondsign##int##bits##x##size b) \ { \ V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \ + V##secondcsign##INT##bits##x##size##_ALIGNED_ARRAY(orig_b); \ V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \ \ v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \ \ v##sign##int##bits##x##size##_store_aligned(a, orig_a); \ - v##sign##int##bits##x##size##_store_aligned(b, orig_b); \ + v##secondsign##int##bits##x##size##_store_aligned(b, orig_b); \ v##sign##int##bits##x##size##_store_aligned(c, orig_c); \ \ for (int i = 0; i < size; i++) { \ if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \ - fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, equiv, orig_c[i]); \ + fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, #equiv, (vec_##sign##int##bits)(equiv), orig_c[i]); \ print_v##sign##int##bits##x##size(stderr,a); \ - print_v##sign##int##bits##x##size(stderr,b); \ + print_v##secondsign##int##bits##x##size(stderr,b); \ print_v##sign##int##bits##x##size(stderr,c); \ fprintf(stderr, "\n"); \ return 1; \ @@ -25,38 +25,18 @@ return 0; \ } +#define CREATE_TEST(sign, psign, csign, bits, size, op, equiv) \ + CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, sign, csign) + #define CREATE_TEST_SHIFT(sign, psign, csign, bits, size, op, equiv) \ - static int test_arith_v##sign##int##bits##x##size##_##op(v##sign##int##bits##x##size a, vuint##bits##x##size b) \ - { \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_a); \ - VUINT##bits##x##size##_ALIGNED_ARRAY(orig_b); \ - V##csign##INT##bits##x##size##_ALIGNED_ARRAY(orig_c); \ - \ - v##sign##int##bits##x##size c = v##sign##int##bits##x##size##_##op(a, b); \ - \ - v##sign##int##bits##x##size##_store_aligned(a, orig_a); \ - vuint##bits##x##size##_store_aligned(b, orig_b); \ - v##sign##int##bits##x##size##_store_aligned(c, orig_c); \ - \ - for (int i = 0; i < size; i++) { \ - if ((sign##int##bits##_t)(equiv) != orig_c[i]) { \ - fprintf(stderr, "v" #sign "int" #bits "x" #size "_" #op " test FAILED at index %d: (" #equiv ") [%" PRI ## psign ## bits "] does not equal result [%" PRI ## psign ## bits "]!\n", i, (sign##int##bits##_t)(equiv), orig_c[i]); \ - print_v##sign##int##bits##x##size(stderr,a); \ - print_vuint##bits##x##size(stderr,b); \ - print_v##sign##int##bits##x##size(stderr,c); \ - fprintf(stderr, "\n"); \ - return 1; \ - } \ - } \ - \ - return 0; \ - } + CREATE_TEST_EX(sign, psign, csign, bits, size, op, equiv, u, U) #define CREATE_TESTS_SIGN(sign, psign, csign, bits, size) \ CREATE_TEST(sign, psign, csign, bits, size, add, orig_a[i] + orig_b[i]) \ CREATE_TEST(sign, psign, csign, bits, size, sub, orig_a[i] - orig_b[i]) \ CREATE_TEST(sign, psign, csign, bits, size, mul, orig_a[i] * orig_b[i]) \ CREATE_TEST(sign, psign, csign, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \ + CREATE_TEST(sign, psign, csign, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \ CREATE_TEST(sign, psign, csign, bits, size, and, orig_a[i] & orig_b[i]) \ CREATE_TEST(sign, psign, csign, bits, size, or, orig_a[i] | orig_b[i]) \ CREATE_TEST(sign, psign, csign, bits, size, xor, orig_a[i] ^ orig_b[i]) \ @@ -113,6 +93,7 @@ ret |= test_arith_v##sign##int##bits##x##size##_sub(a, b); \ ret |= test_arith_v##sign##int##bits##x##size##_mul(a, b); \ ret |= test_arith_v##sign##int##bits##x##size##_div(a, b); \ + ret |= test_arith_v##sign##int##bits##x##size##_mod(a, b); \ ret |= test_arith_v##sign##int##bits##x##size##_and(a, b); \ ret |= test_arith_v##sign##int##bits##x##size##_or(a, b); \ ret |= test_arith_v##sign##int##bits##x##size##_xor(a, b); \ diff -r f9ca85d2f14c -r 55cadb1fac4b utils/gengcc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/utils/gengcc.c Sun Apr 27 02:49:53 2025 -0400 @@ -0,0 +1,390 @@ +/** + * vec - a tiny SIMD vector library in C99 + * + * Copyright (c) 2024-2025 Paper + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. +**/ + +/* Use this file to generate include/vec/impl/generic.h !! + * + * `gcc -o gengeneric gengeneric.c` */ + +#include +#include +#include +#include + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + +/* ------------------------------------------------------------------------ */ + +enum op { + /* return vector, take in a integer */ + OP_SPLAT = 0, + + /* return vector, take in an array */ + OP_LOAD_ALIGNED, + OP_LOAD, + + /* void, take in vector and array */ + OP_STORE_ALIGNED, + OP_STORE, + + /* return vector, takes in two vectors */ + OP_ADD, + OP_SUB, + OP_MUL, + OP_AND, + OP_OR, + OP_XOR, + OP_CMPLT, + OP_CMPEQ, + OP_CMPGT, + OP_CMPLE, + OP_CMPGE, + OP_MIN, + OP_MAX, + OP_AVG, + + /* return vector, takes in a vector and an explicitly unsigned vector */ + OP_LSHIFT, + OP_RSHIFT, + OP_LRSHIFT, + + /* return vector, takes in a vector */ + OP_NOT, + + OP_FINAL_, + + /* operations that have some sort of "caveat" should go here, until + * they are fixed or removed */ + + OP_DIV, /* this one causes a floating point exception on my machine. + * possibly we could change the behavior of divide-by-zero + * with some gcc pragma ? --paper */ + OP_MOD, /* ditto with the above */ +}; + +/* convert op -> string */ +static struct { + const char *u; + const char *l; +} op_names[] = { + [OP_SPLAT] = {"SPLAT", "splat"}, + [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"}, + [OP_LOAD] = {"LOAD", "load"}, + [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"}, + [OP_STORE] = {"STORE", "store"}, + [OP_ADD] = {"ADD", "add"}, + [OP_SUB] = {"SUB", "sub"}, + [OP_MUL] = {"MUL", "mul"}, + /*[OP_DIV] = {"DIV", "div"},*/ + [OP_AVG] = {"AVG", "avg"}, + [OP_AND] = {"AND", "and"}, + [OP_OR] = {"OR", "or"}, + [OP_XOR] = {"XOR", "xor"}, + [OP_NOT] = {"NOT", "not"}, + [OP_CMPLT] = {"CMPLT", "cmplt"}, + [OP_CMPEQ] = {"CMPEQ", "cmpeq"}, + [OP_CMPGT] = {"CMPGT", "cmpgt"}, + [OP_CMPLE] = {"CMPLE", "cmple"}, + [OP_CMPGE] = {"CMPGE", "cmpge"}, + [OP_MIN] = {"MIN", "min"}, + [OP_MAX] = {"MAX", "max"}, + [OP_RSHIFT] = {"RSHIFT", "rshift"}, + [OP_LRSHIFT] = {"LRSHIFT", "lrshift"}, + [OP_LSHIFT] = {"LSHIFT", "lshift"}, +}; + +#define UPSIGN(x) ((x) ? "" : "U") +#define LOSIGN(x) ((x) ? "" : "u") + +static void print_gcc_op(enum op op, int is_signed, int bits, int size) +{ + int i; + + printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); + + printf("VEC_FUNC_IMPL "); + + /* first; the return value */ + switch (op) { + case OP_SPLAT: + case OP_LOAD_ALIGNED: + case OP_LOAD: + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_AND: + case OP_OR: + case OP_XOR: + case OP_CMPLT: + case OP_CMPEQ: + case OP_CMPGT: + case OP_CMPLE: + case OP_CMPGE: + case OP_MIN: + case OP_MAX: + case OP_AVG: + case OP_RSHIFT: + case OP_LRSHIFT: + case OP_LSHIFT: + case OP_NOT: + printf("v%sint%dx%d", LOSIGN(is_signed), bits, size); + break; + case OP_STORE_ALIGNED: + case OP_STORE: + printf("void"); + break; + } + + /* whitespace and function name */ + printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l); + + /* parameters */ + switch (op) { + case OP_SPLAT: + printf("vec_%sint%d x", LOSIGN(is_signed), bits); + break; + case OP_LOAD_ALIGNED: + case OP_LOAD: + printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size); + break; + case OP_STORE_ALIGNED: + case OP_STORE: + printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); + break; + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_AND: + case OP_OR: + case OP_XOR: + case OP_CMPLT: + case OP_CMPEQ: + case OP_CMPGT: + case OP_CMPLE: + case OP_CMPGE: + case OP_MIN: + case OP_MAX: + case OP_AVG: + printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); + break; + case OP_RSHIFT: + case OP_LRSHIFT: + case OP_LSHIFT: + printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size); + break; + case OP_NOT: + printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size); + break; + } + + puts(")\n{"); + + switch (op) { + case OP_SPLAT: + printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); + printf("\tvec.gcc = (__typeof__(vec.gcc)){"); + for (i = 0; i < size; i++) + printf("x,"); + printf("};\n"); + printf("\treturn vec;\n"); + break; + case OP_LOAD_ALIGNED: + printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); + puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); + printf("\treturn vec;\n"); + break; + case OP_LOAD: + printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); + puts("\tmemcpy(&vec, x, sizeof(vec));"); + printf("\treturn vec;\n"); + break; + case OP_STORE_ALIGNED: + puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;"); + break; + case OP_STORE: + puts("\tmemcpy(arr, &vec, sizeof(vec));"); + break; + case OP_ADD: + case OP_SUB: + case OP_MUL: + case OP_DIV: + case OP_AND: + case OP_OR: + case OP_XOR: + case OP_CMPLT: + case OP_CMPEQ: + case OP_CMPGT: + case OP_CMPLE: + case OP_CMPGE: { + const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="}; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]); + printf("\treturn vec1;\n"); + break; + } + + case OP_LSHIFT: + case OP_RSHIFT: { + const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"}; + + printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]); + printf("\treturn vec1;\n"); + break; + } + + case OP_LRSHIFT: { + /* sigh */ + printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); + printf("\treturn vec1;\n"); + break; + } + case OP_MIN: + case OP_MAX: { + const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"}; + + printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size); + printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]); + printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); + printf("\treturn vec1;\n"); + break; + } + case OP_AVG: + if (is_signed) { + printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); + printf("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);\n"); + printf("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);\n"); + printf("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);\n"); + printf("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);\n"); + puts(""); + printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); + } else { + printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n"); + } + + printf("\treturn vec1;\n"); + break; + case OP_NOT: + printf("\tvec.gcc = ~vec.gcc;\n"); + printf("\treturn vec;\n"); + break; + default: + printf("#error implement this operation"); + break; + } + + /* end function definition */ + puts("}"); + + printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); + puts("#endif"); +} + +static inline void print_ops(int is_signed, int bits, int size) +{ + int i; + + printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); + + for (i = 0; i < OP_FINAL_; i++) + print_gcc_op(i, is_signed, bits, size); +} + +static const char *header = + "/**\n" + " * vec - a tiny SIMD vector library in C99\n" + " * \n" + " * Copyright (c) 2024-2025 Paper\n" + " * \n" + " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" + " * of this software and associated documentation files (the \"Software\"), to deal\n" + " * in the Software without restriction, including without limitation the rights\n" + " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" + " * copies of the Software, and to permit persons to whom the Software is\n" + " * furnished to do so, subject to the following conditions:\n" + " * \n" + " * The above copyright notice and this permission notice shall be included in all\n" + " * copies or substantial portions of the Software.\n" + " * \n" + " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" + " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" + " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" + " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" + " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" + " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" + " * SOFTWARE.\n" + "**/\n" + "\n" + "/* This file is automatically generated! Do not edit it directly!\n" + " * Edit the code that generates it in utils/gengcc.c --paper */\n" + "\n" + "#ifndef VEC_IMPL_GCC_H_\n" + "#define VEC_IMPL_GCC_H_\n" + "\n"; + +static const char *footer = + "#endif /* VEC_IMPL_GCC_H_ */\n"; + +int main(void) +{ + static struct { + int bits, size; + } defs[] = { + /* -- 8-bit */ + {8, 2}, + {8, 4}, + {8, 8}, + {8, 16}, + {8, 32}, + {8, 64}, + + /* -- 16-bit */ + {16, 2}, + {16, 4}, + {16, 8}, + {16, 16}, + {16, 32}, + + /* -- 32-bit */ + {32, 2}, + {32, 4}, + {32, 8}, + {32, 16}, + + /* -- 64-bit */ + {64, 2}, + {64, 4}, + {64, 8}, + }; + int i; + + puts(header); + + for (i = 0; i < ARRAY_SIZE(defs); i++) { + print_ops(1, defs[i].bits, defs[i].size); + print_ops(0, defs[i].bits, defs[i].size); + } + + puts(footer); +} diff -r f9ca85d2f14c -r 55cadb1fac4b utils/gengeneric.c --- a/utils/gengeneric.c Sat Apr 26 15:31:39 2025 -0400 +++ b/utils/gengeneric.c Sun Apr 27 02:49:53 2025 -0400 @@ -137,6 +137,12 @@ " VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] / vec2.generic[i]) : 0, sign, bits, size); \\\n" " }\n" "\n" + "#define VEC_GENERIC_MOD(sign, bits, size) \\\n" + " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_mod(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" + " { \\\n" + " VEC_GENERIC_OPERATION(vec2.generic[i] ? (vec1.generic[i] % vec2.generic[i]) : 0, sign, bits, size); \\\n" + " }\n" + "\n" "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" " { \\\n" @@ -288,6 +294,7 @@ "#define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)\n" "#define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)\n" "#define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)\n" + "#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)\n" "#define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)\n" "#define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)\n" "#define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)\n" @@ -355,6 +362,7 @@ "SUB", "MUL", "DIV", + "MOD", "AVG", "AND", "OR",