diff utils/genaltivec.c @ 41:c6e0df09b86f default tip

*: performance improvements with old GCC, reimplement altivec
author Paper <paper@tflc.us>
date Mon, 28 Apr 2025 16:31:59 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils/genaltivec.c	Mon Apr 28 16:31:59 2025 -0400
@@ -0,0 +1,374 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+/* Use this file to generate include/vec/impl/ppc/altivec.h !!
+ *
+ * `gcc -o genaltivec genaltivec.c` */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+/* ------------------------------------------------------------------------ */
+
+/* #define USE_VSX_EXTENSIONS */
+
+enum op {
+	/* return vector, take in a integer */
+	OP_SPLAT = 0,
+
+	/* return vector, take in an array */
+	OP_LOAD_ALIGNED,
+	OP_LOAD,
+
+	/* void, take in vector and array */
+	OP_STORE_ALIGNED,
+	OP_STORE,
+
+	/* return vector, takes in two vectors */
+	OP_ADD,
+	OP_SUB,
+	OP_MUL,
+	OP_AND,
+	OP_OR,
+	OP_XOR,
+	OP_CMPLT,
+	OP_CMPEQ,
+	OP_CMPGT,
+#ifdef USE_VSX_EXTENSIONS
+	OP_CMPLE,
+	OP_CMPGE,
+#endif
+	OP_MIN,
+	OP_MAX,
+	OP_AVG,
+
+	/* return vector, takes in a vector and an explicitly unsigned vector */
+	OP_LSHIFT,
+	OP_LRSHIFT,
+	OP_RSHIFT,
+
+	OP_FINAL_,
+
+	/* unimplemented, no altivec version :) */
+	OP_NOT,
+};
+
+/* convert op -> string */
+static struct {
+	const char *u;
+	const char *l;
+} op_names[] = {
+	[OP_SPLAT] = {"SPLAT", "splat"},
+	[OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"},
+	[OP_LOAD] = {"LOAD", "load"},
+	[OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"},
+	[OP_STORE] = {"STORE", "store"},
+	[OP_ADD] = {"ADD", "add"},
+	[OP_SUB] = {"SUB", "sub"},
+	[OP_MUL] = {"MUL", "mul"},
+	[OP_AVG] = {"AVG", "avg"},
+	[OP_AND] = {"AND", "and"},
+	[OP_OR] = {"OR", "or"},
+	[OP_XOR] = {"XOR", "xor"},
+	[OP_NOT] = {"NOT", "not"},
+	[OP_CMPLT] = {"CMPLT", "cmplt"},
+	[OP_CMPEQ] = {"CMPEQ", "cmpeq"},
+	[OP_CMPGT] = {"CMPGT", "cmpgt"},
+#ifdef USE_VSX_EXTENSIONS
+	[OP_CMPLE] = {"CMPLE", "cmple"},
+	[OP_CMPGE] = {"CMPGE", "cmpge"},
+#endif
+	[OP_MIN] = {"MIN", "min"},
+	[OP_MAX] = {"MAX", "max"},
+	[OP_RSHIFT] = {"RSHIFT", "rshift"},
+	[OP_LRSHIFT] = {"LRSHIFT", "lrshift"},
+	[OP_LSHIFT] = {"LSHIFT", "lshift"},
+};
+
+#define UPSIGN(x) ((x) ? "" : "U")
+#define LOSIGN(x) ((x) ? "" : "u")
+
+static void print_gcc_op(enum op op, int is_signed, int bits, int size)
+{
+	int i;
+
+	/* compatibility with ancient gcc */
+	switch (op) {
+	case OP_MUL:
+		puts("#ifdef vec_mul");
+		break;
+	case OP_SPLAT:
+		puts("#ifdef vec_splats");
+		break;
+	default:
+		break;
+	}
+
+	printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
+
+	printf("VEC_FUNC_IMPL ");
+
+	/* first; the return value */
+	switch (op) {
+	case OP_SPLAT:
+	case OP_LOAD_ALIGNED:
+	case OP_LOAD:
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+#ifdef USE_VSX_EXTENSIONS
+	case OP_CMPLE:
+	case OP_CMPGE:
+#endif
+	case OP_MIN:
+	case OP_MAX:
+	case OP_AVG:
+	case OP_RSHIFT:
+	case OP_LRSHIFT:
+	case OP_LSHIFT:
+	case OP_NOT:
+		printf("v%sint%dx%d", LOSIGN(is_signed), bits, size);
+		break;
+	case OP_STORE_ALIGNED:
+	case OP_STORE:
+		printf("void");
+		break;
+	}
+
+	/* whitespace and function name */
+	printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l);
+
+	/* parameters */
+	switch (op) {
+	case OP_SPLAT:
+		printf("vec_%sint%d x", LOSIGN(is_signed), bits);
+		break;
+	case OP_LOAD_ALIGNED:
+	case OP_LOAD:
+		printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size);
+		break;
+	case OP_STORE_ALIGNED:
+	case OP_STORE:
+		printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
+		break;
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+#ifdef USE_VSX_EXTENSIONS
+	case OP_CMPLE:
+	case OP_CMPGE:
+#endif
+	case OP_MIN:
+	case OP_MAX:
+	case OP_AVG:
+		printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
+		break;
+	case OP_RSHIFT:
+	case OP_LRSHIFT:
+	case OP_LSHIFT:
+		printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size);
+		break;
+	case OP_NOT:
+		printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size);
+		break;
+	}
+
+	puts(")\n{");
+
+	switch (op) {
+	case OP_SPLAT:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		printf("\tvec.altivec = vec_splats(x);\n");
+		printf("\treturn vec;\n");
+		break;
+	case OP_LOAD_ALIGNED:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		puts("\tvec.altivec = vec_ld(0, x);");
+		printf("\treturn vec;\n");
+		break;
+	case OP_LOAD:
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));");
+		printf("\treturn vec;\n");
+		break;
+	case OP_STORE_ALIGNED:
+		puts("\tvec_st(vec.altivec, 0, arr);");
+		break;
+	case OP_STORE:
+		/* ??? */
+		puts("\tmemcpy(arr, &vec, sizeof(vec));");
+		break;
+	case OP_ADD:
+	case OP_SUB:
+	case OP_MUL:
+	case OP_AND:
+	case OP_OR:
+	case OP_XOR:
+	case OP_AVG:
+	case OP_CMPLT:
+	case OP_CMPEQ:
+	case OP_CMPGT:
+#ifdef USE_VSX_EXTENSIONS
+	case OP_CMPLE:
+	case OP_CMPGE:
+#endif
+	case OP_LSHIFT:
+	case OP_LRSHIFT:
+	case OP_RSHIFT:
+	case OP_MIN:
+	case OP_MAX: {
+		static const char *op_altivec[OP_LRSHIFT - OP_ADD + 1] = {"add", "sub", "mul", "and", "or", "xor", "cmplt", "cmpeq", "cmpgt",
+#ifdef USE_VSX_EXTENSIONS
+			"cmple",
+			"cmpge",
+#endif
+			"min", "max", "avg", "sl", "sr"};
+		static const char *types[] = {"char", "short", NULL, "int"};
+
+		printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
+		if (op == OP_RSHIFT) {
+			printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "a" : "");
+		} else {
+			printf("\tvec.altivec = (vector %s %s)vec_%s(vec1.altivec, vec2.altivec);\n", (is_signed) ? "signed" : "unsigned", types[(bits / 8) - 1], op_altivec[op - OP_ADD]);
+		}
+		printf("\treturn vec;\n");
+		break;
+	}
+	default:
+		printf("#error implement this operation");
+		break;
+	}
+
+	/* end function definition */
+	puts("}");
+
+	printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
+	puts("#endif");
+
+	switch (op) {
+	case OP_SPLAT:
+	case OP_MUL:
+		puts("#endif");
+		break;
+	default:
+		break;
+	}
+}
+
+static inline void print_ops(int is_signed, int bits, int size)
+{
+	int i;
+
+	printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
+
+	for (i = 0; i < OP_FINAL_; i++)
+		print_gcc_op(i, is_signed, bits, size);
+}
+
+#ifdef USE_VSX_EXTENSIONS
+# define HEADER_GUARD_NAME "VSX"
+#else
+# define HEADER_GUARD_NAME "ALTIVEC"
+#endif
+
+static const char *header =
+	"/**\n"
+	" * vec - a tiny SIMD vector library in C99\n"
+	" * \n"
+	" * Copyright (c) 2024-2025 Paper\n"
+	" * \n"
+	" * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
+	" * of this software and associated documentation files (the \"Software\"), to deal\n"
+	" * in the Software without restriction, including without limitation the rights\n"
+	" * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+	" * copies of the Software, and to permit persons to whom the Software is\n"
+	" * furnished to do so, subject to the following conditions:\n"
+	" * \n"
+	" * The above copyright notice and this permission notice shall be included in all\n"
+	" * copies or substantial portions of the Software.\n"
+	" * \n"
+	" * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
+	" * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
+	" * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
+	" * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
+	" * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
+	" * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
+	" * SOFTWARE.\n"
+	"**/\n"
+	"\n"
+	"/* This file is automatically generated! Do not edit it directly!\n"
+	" * Edit the code that generates it in utils/genaltivec.c  --paper */\n"
+	"\n"
+	"#ifndef VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n"
+	"#define VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_\n"
+	"\n";
+
+static const char *footer = 
+	"#endif /* VEC_IMPL_PPC_" HEADER_GUARD_NAME "_H_ */\n";
+
+int main(void)
+{
+	static struct {
+		int bits, size;
+	} defs[] = {
+		/* -- 8-bit */
+		{8, 16},
+		/* -- 16-bit */
+		{16, 8},
+
+		/* -- 32-bit */
+		{32, 4},
+
+#ifdef USE_VSX_EXTENSIONS
+		/* -- 64-bit */
+		{64, 2},
+#endif
+	};
+	int i;
+
+	puts(header);
+
+	for (i = 0; i < ARRAY_SIZE(defs); i++) {
+		print_ops(1, defs[i].bits, defs[i].size);
+		print_ops(0, defs[i].bits, defs[i].size);
+	}
+
+	puts(footer);
+}