diff gen/genlib.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genlib.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,294 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+struct strs type_str[] = {
+	[TYPE_INT]   = {"int", "INT"},
+	[TYPE_UINT]  = {"uint", "UINT"},
+	[TYPE_FLOAT] = {"f", "F"},
+};
+
+void gen_print_vtype(int type, int bits, int size)
+{
+	printf("v%s%dx%d", type_str[type].l, bits, size);
+}
+
+void gen_print_stype(int type, int bits)
+{
+	printf("vec_%s%d", type_str[type].l, bits);
+}
+
+static void vret(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+
+	(void)op;
+}
+
+static void nret(int op, int type, int bits, int size)
+{
+	printf("void");
+
+	(void)op, (void)type, (void)bits, (void)size;
+}
+
+static void voneparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec");
+
+	(void)op;
+}
+
+static void vtwoparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec1, ");
+	gen_print_vtype(type, bits, size);
+	printf(" vec2");
+
+	(void)op;
+}
+
+static void vshiftparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec1, ");
+	gen_print_vtype(TYPE_UINT, bits, size);
+	printf(" vec2");
+
+	(void)op;
+}
+
+static void vloadparam(int op, int type, int bits, int size)
+{
+	printf("const ");
+	gen_print_stype(type, bits);
+	printf(" x[%d]", size);
+
+	(void)op;
+}
+
+static void vsplatparam(int op, int type, int bits, int size)
+{
+	gen_print_stype(type, bits);
+	printf(" x");
+
+	(void)op, (void)size;
+}
+
+static void vstoreparam(int op, int type, int bits, int size)
+{
+	gen_print_vtype(type, bits, size);
+	printf(" vec, ");
+	gen_print_stype(type, bits);
+	printf(" x[%d]", size);
+
+	(void)op;
+}
+
+struct op_info ops[] = {
+	[OP_SPLAT] = {"SPLAT", "splat", vret, vsplatparam},
+	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned", vret, vloadparam},
+	[OP_LOAD] = {"LOAD", "load", vret, vloadparam},
+	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned", nret, vstoreparam},
+	[OP_STORE] = {"STORE", "store", nret, vstoreparam},
+	[OP_ADD] = {"ADD", "add", vret, vtwoparam},
+	[OP_SUB] = {"SUB", "sub", vret, vtwoparam},
+	[OP_MUL] = {"MUL", "mul", vret, vtwoparam},
+	[OP_DIV] = {"DIV", "div", vret, vtwoparam},
+	[OP_MOD] = {"MOD", "mod", vret, vtwoparam},
+	[OP_AVG] = {"AVG", "avg", vret, vtwoparam},
+	[OP_AND] = {"AND", "and", vret, vtwoparam},
+	[OP_OR] = {"OR", "or", vret, vtwoparam},
+	[OP_XOR] = {"XOR", "xor", vret, vtwoparam},
+	[OP_NOT] = {"NOT", "not", vret, voneparam},
+	[OP_CMPLT] = {"CMPLT", "cmplt", vret, vtwoparam},
+	[OP_CMPEQ] = {"CMPEQ", "cmpeq", vret, vtwoparam},
+	[OP_CMPGT] = {"CMPGT", "cmpgt", vret, vtwoparam},
+	[OP_CMPLE] = {"CMPLE", "cmple", vret, vtwoparam},
+	[OP_CMPGE] = {"CMPGE", "cmpge", vret, vtwoparam},
+	[OP_MIN] = {"MIN", "min", vret, vtwoparam},
+	[OP_MAX] = {"MAX", "max", vret, vtwoparam},
+	[OP_RSHIFT] = {"RSHIFT", "rshift", vret, vshiftparam},
+	[OP_LRSHIFT] = {"LRSHIFT", "lrshift", vret, vshiftparam},
+	[OP_LSHIFT] = {"LSHIFT", "lshift", vret, vshiftparam},
+};
+
+struct op_info *gen_op_info(int op)
+{
+	return &ops[op];
+}
+
+/* okay */
+extern int (*genlib_test(void))[(ARRAY_SIZE(ops) == OP_FINAL_) ? 1 : -2];
+
+int op_impl_check_always(int op, int type, int bits, int size)
+{
+	return 1;
+
+	(void)op, (void)type, (void)bits, (void)size;
+}
+
+static inline int verify_op(int op, int type)
+{
+	switch (op) {
+	case OP_AND:
+	case OP_XOR:
+	case OP_OR:
+	case OP_NOT:
+	case OP_RSHIFT:
+	case OP_LSHIFT:
+	case OP_LRSHIFT:
+		/* these operations make no sense for floating point */
+		if (type == TYPE_FLOAT)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
+/* XXX: would it be faster to unroll literally everything instead of defining everything,
+ * and then unpacking it all? */
+static const char *header_tmpl =
+	"/**\n"
+	" * vec - a tiny SIMD vector library in C99\n"
+	" * \n"
+	" * Copyright (c) 2024-2025 Paper\n"
+	" * \n"
+	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
+	" * of this software and associated documentation files (the \"Software\"), to deal\n"
+	" * in the Software without restriction, including without limitation the rights\n"
+	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+	" * copies of the Software, and to permit persons to whom the Software is\n"
+	" * furnished to do so, subject to the following conditions:\n"
+	" * \n"
+	" * The above copyright notice and this permission notice shall be included in all\n"
+	" * copies or substantial portions of the Software.\n"
+	" * \n"
+	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
+	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
+	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
+	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
+	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
+	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
+	" * SOFTWARE.\n"
+	"**/\n"
+	"\n"
+	"/* This file is automatically generated! Do not edit it directly!\n"
+	" * Edit the code that generates it in utils/gen%s.c  --paper */\n"
+	"\n"
+	"/* ------------------------------------------------------------------------ */\n"
+	"/* PREPROCESSOR HELL INCOMING */\n\n";
+
+int gen(struct op_impl op_impl[OP_FINAL_], const char *name)
+{
+	static struct {
+		int type, bits, size;
+	} types[] = {
+#define INT_TYPE(bits, size) {TYPE_INT, bits, size}, {TYPE_UINT, bits, size}
+
+		INT_TYPE(8, 2),
+		INT_TYPE(8, 4),
+		INT_TYPE(8, 8),
+		INT_TYPE(8, 16),
+		INT_TYPE(8, 32),
+		INT_TYPE(8, 64),
+
+		INT_TYPE(16, 2),
+		INT_TYPE(16, 4),
+		INT_TYPE(16, 8),
+		INT_TYPE(16, 16),
+		INT_TYPE(16, 32),
+
+		INT_TYPE(32, 2),
+		INT_TYPE(32, 4),
+		INT_TYPE(32, 8),
+		INT_TYPE(32, 16),
+
+		INT_TYPE(64, 2),
+		INT_TYPE(64, 4),
+		INT_TYPE(64, 8),
+
+#undef INT_TYPE
+
+		/* float */
+		{TYPE_FLOAT, 32, 2},
+		{TYPE_FLOAT, 32, 4},
+		{TYPE_FLOAT, 32, 8},
+		{TYPE_FLOAT, 32, 16},
+
+		/* double */
+		{TYPE_FLOAT, 64, 2},
+		{TYPE_FLOAT, 64, 4},
+		{TYPE_FLOAT, 64, 8},
+	};
+	int op;
+	size_t s;
+
+	printf(header_tmpl, name);
+
+	for (s = 0; s < ARRAY_SIZE(types); s++) {
+		for (op = 0; op < OP_FINAL_; op++) {
+			if (!op_impl[op].pbody)
+				continue; /* What? */
+
+			if (op_impl[op].check && !op_impl[op].check(op, types[s].type, types[s].bits, types[s].size))
+				continue;
+
+			if (!verify_op(op, types[s].type))
+				continue;
+
+			printf("#if !defined(V%s%dx%d_%s_DEFINED)", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u);
+
+			if (op_impl[op].ppcheck) {
+				printf(" \\\n\t && (");
+				op_impl[op].ppcheck(op, types[s].type, types[s].bits, types[s].size);
+				printf(")");
+			}
+
+			puts("");
+
+			printf("VEC_FUNC_IMPL ");
+			ops[op].pret(op, types[s].type, types[s].bits, types[s].size);
+			printf(" ");
+			gen_print_vtype(types[s].type, types[s].bits, types[s].size);
+			printf("_%s(", ops[op].l);
+			ops[op].pparam(op, types[s].type, types[s].bits, types[s].size);
+			puts(")\n{");
+
+			op_impl[op].pbody(op, types[s].type, types[s].bits, types[s].size);
+
+			puts("}");
+
+			printf("# define V%s%dx%d_%s_DEFINED\n", type_str[types[s].type].u, types[s].bits, types[s].size, ops[op].u);
+			puts("#endif");
+		}
+	}
+
+}