/**
 * vec - a tiny SIMD vector library in plain C99
 * 
 * Copyright (c) 2024 Paper
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
**/

/* Altivec vector support. */

#include <stdint.h>
#include <string.h>

#include <altivec.h>

#define VEC_ALTIVEC_ALIGNMENT 16

/* GCC 4.2.1 on Mac OS X doesn't have these for some reason */
#ifdef vec_mul
# define VEC_ALTIVEC_MUL(sign, csign, bits, size) \
	VEC_DECL_MUL(sign, csign, bits, size) \
	{ \
		return vec_mul(vec1, vec2); \
	}
#else
# define VEC_ALTIVEC_MUL(sign, csign, bits, size) \
	VEC_GENERIC_MULTIPLY(sign, csign, bits, size)
#endif

#ifdef vec_splats
# define VEC_ALTIVEC_SPLAT(sign, csign, bits, size) \
	VEC_DECL_SPLAT(sign, bits, size) \
	{ \
		return vec_splats(x); \
	}
#else
# define VEC_ALTIVEC_SPLAT(sign, csign, bits, size) \
	VEC_GENERIC_SPLAT(sign, csign, bits, size)
#endif

#define VEC_ALTIVEC_uRSHIFT vec_sr
#define VEC_ALTIVEC_RSHIFT vec_sra

#define VEC_ALTIVEC_uLRSHIFT(sign, csign, bits, size) \
	VEC_DECL_SHIFT(sign, bits, size, l, r) \
	{ \
		return vec_sr(vec1, vec2); \
	}
#define VEC_ALTIVEC_LRSHIFT(sign, csign, bits, size) \
	VEC_GENERIC_SHIFT(sign, csign, bits, size, l, r)

/* Since altivec conveniently made their API super user friendly, we can just use
 * one giant macro to define literally everything */
#define VEC_DEFINE_OPERATIONS(sign, csign, bits, size) \
	VEC_DECL_LOAD_ALIGNED(sign, bits, size) \
	{ \
		return vec_ld(0, in); \
	} \
	\
	VEC_DECL_LOAD(sign, bits, size) \
	{ \
		return vec_perm(vec_ld(0, in), vec_ld(VEC_ALTIVEC_ALIGNMENT, in), vec_lvsl(0, in)); \
	} \
	\
	VEC_DECL_STORE_ALIGNED(sign, bits, size) \
	{ \
		vec_st(vec, 0, out); \
	} \
	\
	VEC_DECL_STORE(sign, bits, size) \
	{ \
		VEC_ALIGNED_ARRAY(sign##int##bits##_t, aligned_out, size, VEC_ALTIVEC_ALIGNMENT); \
		vec_st(vec, 0, aligned_out); \
		memcpy(out, aligned_out, size * sizeof(*aligned_out)); \
	} \
	\
	VEC_DECL_ADD(sign, bits, size) \
	{ \
		return vec_add(vec1, vec2); \
	} \
	\
	VEC_DECL_SUB(sign, bits, size) \
	{ \
		return vec_sub(vec1, vec2); \
	} \
	\
	VEC_ALTIVEC_MUL(sign, csign, bits, size) \
	\
	VEC_DECL_SHIFT(sign, bits, size, , l) \
	{ \
		return vec_sl(vec1, vec2); \
	} \
	\
	VEC_DECL_SHIFT(sign, bits, size, , r) \
	{ \
		return VEC_ALTIVEC_##sign##RSHIFT(vec1, vec2); \
	} \
	\
	VEC_ALTIVEC_##sign##LRSHIFT(sign, csign, bits, size) \
	\
	VEC_DECL_AVG(sign, bits, size) \
	{ \
		return vec_avg(vec1, vec2); \
	} \
	\
	VEC_DECL_AND(sign, bits, size) \
	{ \
		return vec_and(vec1, vec2); \
	} \
	\
	VEC_DECL_OR(sign, bits, size) \
	{ \
		return vec_or(vec1, vec2); \
	} \
	\
	VEC_DECL_XOR(sign, bits, size) \
	{ \
		return vec_xor(vec1, vec2); \
	} \
	\
	VEC_GENERIC_COMPARISONS(sign, csign, bits, size) \
	VEC_GENERIC_DIVIDE(sign, csign, bits, size) \
	VEC_ALTIVEC_SPLAT(sign, csign, bits, size)

#ifndef VEC_VUINT8X16
# define VEC_VUINT8X16
typedef vector unsigned char vuint8x16;
# define VUINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
	(vuint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p }
# define VUINT8x16_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(u, U, 8, 16)
#endif /* VEC_VUINT8X16 */

#ifndef VEC_VINT8X16
# define VEC_VINT8X16
typedef vector signed char vint8x16;
# define VINT8x16_CONSTANT(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p) \
	(vint8x16){ a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p }
# define VINT8x16_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(, , 8, 16)
#endif /* VEC_VINT8X16 */

#ifndef VEC_VUINT16X8
# define VEC_VUINT16X8
typedef vector unsigned short vuint16x8;
# define VUINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \
	(vuint16x8){ a, b, c, d, e, f, g, h }
# define VUINT16x8_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(u, U, 16, 8)
#endif /* VEC_VUINT16X8 */

#ifndef VEC_VINT16X8
# define VEC_VINT16X8
typedef vector signed short vint16x8;
# define VINT16x8_CONSTANT(a, b, c, d, e, f, g, h) \
	(vint16x8){ a, b, c, d, e, f, g, h }
# define VINT16x8_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(, , 16, 8)
#endif /* VEC_VINT16X8 */

#ifndef VEC_VUINT32X4
# define VEC_VUINT32X4
typedef vector unsigned int vuint32x4;
# define VUINT32x4_CONSTANT(a, b, c, d) \
	(vuint32x4){ a, b, c, d }
# define VUINT32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(u, U, 32, 4)
#endif /* VEC_VUINT32X4 */

#ifndef VEC_VINT32X4
# define VEC_VINT32X4
typedef vector signed int vint32x4;
# define VINT32x4_CONSTANT(a, b, c, d) \
	(vint32x4){ a, b, c, d }
# define VINT32x4_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(, , 32, 4)
#endif /* VEC_VINT32X4 */

#if defined(__POWER8__) && defined(__VSX__)

# ifndef VEC_VUINT64X2
#  define VEC_VUINT64X2
typedef vector unsigned long long vuint64x2;
#  define VUINT64x2_CONSTANT(a, b) \
	(vuint64x2){ a, b }
#  define VUINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(u, U, 64, 2)
# endif /* VEC_VUINT64X2 */

# ifndef VEC_VINT64X2
#  define VEC_VINT64X2
typedef vector signed long long vint64x2;
#  define VINT64x2_CONSTANT(a, b) \
	(vint64x2){ a, b }
#  define VINT64x2_ALIGNMENT VEC_ALTIVEC_ALIGNMENT
VEC_DEFINE_OPERATIONS(, , 64, 2)
# endif /* VEC_VINT64X2 */

#endif /* defined(__POWER8__) && defined(__VSX__) */

#undef VEC_DEFINE_OPERATIONS
#undef VEC_ALTIVEC_MUL
#undef VEC_ALTIVEC_SPLAT
