/**
 * vec - a tiny SIMD vector library in plain C99
 * 
 * Copyright (c) 2024 Paper
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
**/

/* Generic array-based implementation. */

#ifndef VEC_IMPL_GENERIC_H_
#define VEC_IMPL_GENERIC_H_

#include <string.h>

// -----------------------------------------------------------------

// TODO implement these so we don't waste stack space by doing the
// fallbacks
#define VEC_GENERIC_DEFINE_OPERATIONS_SIGN(sign, csign, bits, size) \
	static v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_load_aligned(const vec_##sign##int##bits in[size]) \
	{ \
		v##sign##int##bits##x##size vec; \
		memcpy(vec.generic, in, sizeof(vec_##sign##int##bits) * size); \
		return vec; \
	} \
	\
	static void v##sign##int##bits##x##size##_generic_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
	{ \
		memcpy(out, vec.generic, sizeof(vec_##sign##int##bits) * size); \
	} \
	\
	static v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_generic = { \
		/* .splat = */ NULL, \
		v##sign##int##bits##x##size##_generic_load_aligned, \
		v##sign##int##bits##x##size##_generic_load_aligned, \
		v##sign##int##bits##x##size##_generic_store_aligned, \
		v##sign##int##bits##x##size##_generic_store_aligned, \
	};

#define VEC_GENERIC_DEFINE_OPERATIONS(bits, size) \
	VEC_GENERIC_DEFINE_OPERATIONS_SIGN( ,  , bits, size) \
	VEC_GENERIC_DEFINE_OPERATIONS_SIGN(u, U, bits, size)

VEC_GENERIC_DEFINE_OPERATIONS(8, 2)
VEC_GENERIC_DEFINE_OPERATIONS(16, 2)
VEC_GENERIC_DEFINE_OPERATIONS(32, 2)
VEC_GENERIC_DEFINE_OPERATIONS(64, 2)

#undef VEC_GENERIC_DEFINE_OPERATIONS
#undef VEC_GENERIC_DEFINE_OPERATIONS_SIGN

// -----------------------------------------------------------------
// now we can just keep doubling the same implementation

#define VEC_GENERIC_DEFINE_OPERATIONS_SIGN(sign, csign, bits, size, halfsize) \
	static v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_load_aligned(const vec_##sign##int##bits in[size]) \
	{ \
		v##sign##int##bits##x##size vec; \
		vec.generic[0] = v##sign##int##bits##x##halfsize##_load_aligned(in); \
		vec.generic[1] = v##sign##int##bits##x##halfsize##_load_aligned(in + halfsize); \
		return vec; \
	} \
	\
	static v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_load(const vec_##sign##int##bits in[size]) \
	{ \
		v##sign##int##bits##x##size vec; \
		vec.generic[0] = v##sign##int##bits##x##halfsize##_load(in); \
		vec.generic[1] = v##sign##int##bits##x##halfsize##_load(in + halfsize); \
		return vec; \
	} \
	\
	static void v##sign##int##bits##x##size##_generic_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
	{ \
		v##sign##int##bits##x##halfsize##_store_aligned(vec.generic[0], out); \
		v##sign##int##bits##x##halfsize##_store_aligned(vec.generic[1], out + halfsize); \
	} \
	\
	static void v##sign##int##bits##x##size##_generic_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
	{ \
		v##sign##int##bits##x##halfsize##_store(vec.generic[0], out); \
		v##sign##int##bits##x##halfsize##_store(vec.generic[1], out + halfsize); \
	} \
	\
	static v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_generic = { \
		/* .splat = */ NULL, \
		v##sign##int##bits##x##size##_generic_load_aligned, \
		v##sign##int##bits##x##size##_generic_load, \
		v##sign##int##bits##x##size##_generic_store_aligned, \
		v##sign##int##bits##x##size##_generic_store, \
	};

#define VEC_GENERIC_DEFINE_OPERATIONS(bits, size, halfsize) \
	VEC_GENERIC_DEFINE_OPERATIONS_SIGN( ,  , bits, size, halfsize) \
	VEC_GENERIC_DEFINE_OPERATIONS_SIGN(u, U, bits, size, halfsize)

// 32-bit
VEC_GENERIC_DEFINE_OPERATIONS(8, 4, 2)

// 64-bit
VEC_GENERIC_DEFINE_OPERATIONS(8, 8, 4)
VEC_GENERIC_DEFINE_OPERATIONS(16, 4, 2)

// 128-bit
VEC_GENERIC_DEFINE_OPERATIONS(8, 16, 8)
VEC_GENERIC_DEFINE_OPERATIONS(16, 8, 4)
VEC_GENERIC_DEFINE_OPERATIONS(32, 4, 2)

// 256-bit
VEC_GENERIC_DEFINE_OPERATIONS(8, 32, 16)
VEC_GENERIC_DEFINE_OPERATIONS(16, 16, 8)
VEC_GENERIC_DEFINE_OPERATIONS(32, 8, 4)
VEC_GENERIC_DEFINE_OPERATIONS(64, 4, 2)

// 512-bit
VEC_GENERIC_DEFINE_OPERATIONS(8, 64, 32)
VEC_GENERIC_DEFINE_OPERATIONS(16, 32, 16)
VEC_GENERIC_DEFINE_OPERATIONS(32, 16, 8)
VEC_GENERIC_DEFINE_OPERATIONS(64, 8, 4)

#undef VEC_GENERIC_DEFINE_OPERATIONS
#undef VEC_GENERIC_DEFINE_OPERATIONS_SIGN

#endif /* VEC_IMPL_GENERIC_H_ */
