diff gen/genaltivec.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen/genaltivec.c	Wed Apr 30 18:36:38 2025 -0400
@@ -0,0 +1,250 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024-2025 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "genlib.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
+/* ------------------------------------------------------------------------ */
+
+/* #define USE_VSX_EXTENSIONS */
+/* #define USE_POWER8_EXTENSIONS */
+
+static int altivec_check(int op, int type, int bits, int size)
+{
+	switch (bits) {
+	case 8:
+	case 16:
+	case 32:
+#ifdef USE_VSX_EXTENSIONS
+	case 64:
+# ifndef USE_POWER8_EXTENSIONS
+		/* VSX has double, but not int64 */
+		if ((bits == 64) && (type != TYPE_FLOAT))
+			return 0;
+# endif
+#endif
+		if (bits * size == 128)
+			return 1;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int altivec_check_int(int op, int type, int bits, int size)
+{
+	return (altivec_check(op, type, bits, size) && type != TYPE_FLOAT);
+}
+
+static int altivec_check_float(int op, int type, int bits, int size)
+{
+	return (altivec_check(op, type, bits, size) && type == TYPE_FLOAT);
+}
+
+static void altivec_ppcheck(int op, int type, int bits, int size)
+{
+	/* old gcc had a broken partial implementation
+	 * (why even bother adding it at all?) */
+	switch (op) {
+	case OP_MUL: printf("defined(vec_mul)"); break;
+	case OP_SPLAT: printf("defined(vec_splats)"); break;
+	}
+}
+
+static void altivec_splat(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_splats(x);");
+	puts("\treturn vec;");
+}
+
+static void altivec_load(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_perm(vec_ld(0, x), vec_ld(16, x), vec_lvsl(0, x));");
+	puts("\treturn vec;");
+}
+
+static void altivec_load_aligned(int op, int type, int bits, int size)
+{
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	puts("\tvec.altivec = vec_ld(0, x);");
+	puts("\treturn vec;");
+}
+
+static void altivec_store_aligned(int op, int type, int bits, int size)
+{
+	puts("\tvec_st(vec.altivec, 0, x);");
+}
+
+/* no store? */
+
+static void altivec_print_native_type(int type, int bits)
+{
+	/* WITH DIRECTION AND MAGNITUDE! */
+	printf("vector ");
+
+	switch (type) {
+	case TYPE_INT:
+		printf("signed ");
+		break;
+	case TYPE_UINT:
+		printf("unsigned ");
+		break;
+	case TYPE_FLOAT:
+		/* nothing */
+		break;
+	}
+
+	switch (type) {
+	case TYPE_INT:
+	case TYPE_UINT:
+		switch (bits) {
+		case 8: printf("char"); break;
+		case 16: printf("short"); break;
+		case 32: printf("int"); break;
+		case 64: printf("long long"); break;
+		default: break;
+		}
+		break;
+	case TYPE_FLOAT:
+		switch (bits) {
+		case 32: printf("float"); break;
+		case 64: printf("double"); break;
+		default: break;
+		}
+	}
+}
+
+static void altivec_2op(int op, int type, int bits, int size)
+{
+	static const char *op_altivec[] = {
+		[OP_ADD]     = "add",
+		[OP_SUB]     = "sub",
+		[OP_MUL]     = "mul",
+		[OP_DIV]     = "div",
+		[OP_MOD]     = "mod",
+		[OP_AND]     = "and",
+		[OP_OR]      = "or",
+		[OP_XOR]     = "xor",
+		[OP_CMPLT]   = "cmplt",
+		[OP_CMPEQ]   = "cmpeq",
+		[OP_CMPGT]   = "cmpgt",
+		[OP_CMPLE]   = "cmple",
+		[OP_CMPGE]   = "cmpge",
+		[OP_MIN]     = "min",
+		[OP_MAX]     = "max",
+		[OP_AVG]     = "avg",
+		[OP_LSHIFT]  = "sl",
+		[OP_LRSHIFT] = "sr",
+	};
+
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+
+	if (op == OP_RSHIFT) {
+		printf("\tvec.altivec = vec_sr%s(vec1.altivec, vec2.altivec);\n", (type == TYPE_INT) ? "a" : "");
+	} else {
+		printf("\tvec.altivec = (");
+		altivec_print_native_type(type, bits);
+		printf(")vec_%s(vec1.altivec, vec2.altivec);\n", op_altivec[op]);
+	}
+
+	puts("\treturn vec;");
+}
+
+/* ------------------------------------------------------------------------ */
+
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {altivec_check, NULL, altivec_splat},
+	[OP_LOAD_ALIGNED] = {altivec_check, NULL, altivec_load_aligned},
+	[OP_LOAD] = {altivec_check, NULL, altivec_load},
+	[OP_STORE_ALIGNED] = {altivec_check, NULL, altivec_store_aligned},
+
+	/* arithmetic */
+	[OP_ADD] = {altivec_check, NULL, altivec_2op},
+	[OP_SUB] = {altivec_check, NULL, altivec_2op},
+	[OP_MUL] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	/* GCC fails to compile integer division, so limit to floats */
+	[OP_DIV] = {altivec_check_float, NULL, altivec_2op},
+#endif
+#if 0
+	/* This is Power10. I don't have any Power10 hardware :)
+	 * (well, I also don't have any VSX hardware. whatever) */
+	[OP_MOD] = {altivec_check_int, NULL, altivec_2op},
+#endif
+	[OP_AVG] = {altivec_check_int, NULL, altivec_2op},
+
+	/* bitwise */
+	[OP_AND] = {altivec_check, NULL, altivec_2op},
+	[OP_OR] = {altivec_check, NULL, altivec_2op},
+	[OP_XOR] = {altivec_check, NULL, altivec_2op},
+
+	/* min/max */
+	[OP_MIN] = {altivec_check, NULL, altivec_2op},
+	[OP_MAX] = {altivec_check, NULL, altivec_2op},
+
+	/* bitshift */
+	[OP_LSHIFT] = {altivec_check, NULL, altivec_2op},
+	[OP_LRSHIFT] = {altivec_check, NULL, altivec_2op},
+	[OP_RSHIFT] = {altivec_check, NULL, altivec_2op},
+
+	/* comparison */
+	[OP_CMPLT] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	[OP_CMPLE] = {altivec_check, NULL, altivec_2op},
+#endif
+	[OP_CMPEQ] = {altivec_check, NULL, altivec_2op},
+#ifdef USE_VSX_EXTENSIONS
+	[OP_CMPGE] = {altivec_check, NULL, altivec_2op},
+#endif
+	[OP_CMPGT] = {altivec_check, NULL, altivec_2op},
+};
+
+
+int main(void)
+{
+	gen(op_impl,
+#ifdef USE_POWER8_EXTENSIONS
+		"power8"
+#elif defined(USE_VSX_EXTENSIONS)
+		"vsx"
+#else
+		"altivec"
+#endif
+	);
+}