/**
 * vec - a tiny SIMD vector library in plain C99
 * 
 * Copyright (c) 2024 Paper
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
**/

/* Generic array-based implementation. */

#ifndef VEC_IMPL_GENERIC_H_
#define VEC_IMPL_GENERIC_H_

#include "vec/vec.h"

#define VEC_DEFINE_GENERIC_OPERATIONS_SIGN(sign, csign, bits, size) \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_splat(vec_##sign##int##bits x); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_load(const vec_##sign##int##bits in[size]); \
	void v##sign##int##bits##x##size##_generic_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_not(v##sign##int##bits##x##size vec); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
	v##sign##int##bits##x##size v##sign##int##bits##x##size##_generic_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2);

#define VEC_DEFINE_GENERIC_OPERATIONS(bits, size) \
	VEC_DEFINE_GENERIC_OPERATIONS_SIGN( ,  , bits, size) \
	VEC_DEFINE_GENERIC_OPERATIONS_SIGN(u, U, bits, size)

// 16-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 2)

// 32-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 4)
VEC_DEFINE_GENERIC_OPERATIONS(16, 2)

// 64-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 8)
VEC_DEFINE_GENERIC_OPERATIONS(16, 4)
VEC_DEFINE_GENERIC_OPERATIONS(32, 2)

// 128-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 16)
VEC_DEFINE_GENERIC_OPERATIONS(16, 8)
VEC_DEFINE_GENERIC_OPERATIONS(32, 4)
VEC_DEFINE_GENERIC_OPERATIONS(64, 2)

// 256-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 32)
VEC_DEFINE_GENERIC_OPERATIONS(16, 16)
VEC_DEFINE_GENERIC_OPERATIONS(32, 8)
VEC_DEFINE_GENERIC_OPERATIONS(64, 4)

// 512-bit
VEC_DEFINE_GENERIC_OPERATIONS(8, 64)
VEC_DEFINE_GENERIC_OPERATIONS(16, 32)
VEC_DEFINE_GENERIC_OPERATIONS(32, 16)
VEC_DEFINE_GENERIC_OPERATIONS(64, 8)

#undef VEC_DEFINE_GENERIC_OPERATIONS
#undef VEC_DEFINE_GENERIC_OPERATIONS_SIGN

// 16-bit
extern const vint8x2_impl    vint8x2_impl_generic;
extern const vuint8x2_impl   vuint8x2_impl_generic;

// 32-bit
extern const vint8x4_impl    vint8x4_impl_generic;
extern const vuint8x4_impl   vuint8x4_impl_generic;
extern const vint16x2_impl   vint16x2_impl_generic;
extern const vuint16x2_impl  vuint16x2_impl_generic;

// 64-bit
extern const vint8x8_impl    vint8x8_impl_generic;
extern const vuint8x8_impl   vuint8x8_impl_generic;
extern const vint16x4_impl   vint16x4_impl_generic;
extern const vuint16x4_impl  vuint16x4_impl_generic;
extern const vint32x2_impl   vint32x2_impl_generic;
extern const vuint32x2_impl  vuint32x2_impl_generic;

// 128-bit
extern const vint8x16_impl   vint8x16_impl_generic;
extern const vuint8x16_impl  vuint8x16_impl_generic;
extern const vint16x8_impl   vint16x8_impl_generic;
extern const vuint16x8_impl  vuint16x8_impl_generic;
extern const vint32x4_impl   vint32x4_impl_generic;
extern const vuint32x4_impl  vuint32x4_impl_generic;
extern const vint64x2_impl   vint64x2_impl_generic;
extern const vuint64x2_impl  vuint64x2_impl_generic;

// 256-bit
extern const vint8x32_impl   vint8x32_impl_generic;
extern const vuint8x32_impl  vuint8x32_impl_generic;
extern const vint16x16_impl  vint16x16_impl_generic;
extern const vuint16x16_impl vuint16x16_impl_generic;
extern const vint32x8_impl   vint32x8_impl_generic;
extern const vuint32x8_impl  vuint32x8_impl_generic;
extern const vint64x4_impl   vint64x4_impl_generic;
extern const vuint64x4_impl  vuint64x4_impl_generic;

// 512-bit
extern const vint8x64_impl   vint8x64_impl_generic;
extern const vuint8x64_impl  vuint8x64_impl_generic;
extern const vint16x32_impl  vint16x32_impl_generic;
extern const vuint16x32_impl vuint16x32_impl_generic;
extern const vint32x16_impl  vint32x16_impl_generic;
extern const vuint32x16_impl vuint32x16_impl_generic;
extern const vint64x8_impl   vint64x8_impl_generic;
extern const vuint64x8_impl  vuint64x8_impl_generic;

#endif /* VEC_IMPL_GENERIC_H_ */
