vec: gen/gengeneric.c comparison

comparison gen/gengeneric.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)

author	Paper <paper@tflc.us>
date	Wed, 30 Apr 2025 18:36:38 -0400
parents
children

comparison

equal deleted inserted replaced

-:b0a3f0248ecc
+:7955bed1d169
+/**
+* vec - a tiny SIMD vector library in C99
+*
+* Copyright (c) 2024-2025 Paper
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to deal
+* in the Software without restriction, including without limitation the rights
+* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all
+* copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+**/
+#include "genlib.h"
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+static void op_builtin_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_ADD] = "+",
+		[OP_SUB] = "-",
+		[OP_MUL] = "*",
+		[OP_AND] = "&",
+		[OP_OR]  = "|",
+		[OP_XOR] = "^",
+	};
+	int i;
+	for (i = 0; i < size; i++)
+		printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i);
+	puts("\treturn vec1;");
+	(void)bits;
+}
+static void op_builtin_avg_pbody(int op, int type, int bits, int size)
+{
+	int i;
+	switch (type) {
+	case TYPE_INT:
+		printf("\t");
+		gen_print_stype(type, bits);
+		printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n");
+		for (i = 0; i < size; i++)
+			printf(
+				"\tx_d_rem = (vec1.generic[%d] % 2);\n"
+				"\ty_d_rem = (vec2.generic[%d] % 2);\n"
+				"\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n"
+				"\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n"
+				"\n"
+				"\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n"
+			, i, i, i, i, i);
+		break;
+	case TYPE_UINT:
+		for (i = 0; i < size; i++)
+			printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i);
+		break;
+	case TYPE_FLOAT:
+		/* this is probably fine. */
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i);
+		break;
+	}
+	printf("\treturn vec1;\n");
+}
+static void op_builtin_not_pbody(int op, int type, int bits, int size)
+{
+	int i;
+	for (i = 0; i < size; i++)
+		printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i);
+	puts("\treturn vec;");
+}
+static void op_builtin_shift_pbody(int op, int type, int bits, int size)
+{
+	int i;
+	switch (type) {
+	case TYPE_UINT: {
+		const char *ops[] = {
+			[OP_LSHIFT] = "<<",
+			[OP_RSHIFT] = ">>",
+			[OP_LRSHIFT] = ">>",
+		};
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]);
+		break;
+	}
+	case TYPE_INT: {
+		switch (op) {
+		case OP_LSHIFT:
+		case OP_LRSHIFT: {
+			const char *ops[] = {
+				[OP_LSHIFT] = "<<",
+				[OP_LRSHIFT] = ">>",
+			};
+			printf("\tunion { ");
+			gen_print_stype(TYPE_UINT, bits);
+			printf(" u; ");
+			gen_print_stype(TYPE_INT, bits);
+			puts(" s; } x;\n");
+			for (i = 0; i < size; i++)
+				printf(
+					"\tx.s = vec1.generic[%d];\n"
+					"\tx.u %s= vec2.generic[%d];\n"
+					"\tvec1.generic[%d] = x.s;\n",
+				i, ops[op], i, i);
+			break;
+		}
+		case OP_RSHIFT:
+			for (i = 0; i < size; i++)
+				printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i);
+			break;
+		}
+		break;
+	}
+	}
+	puts("\treturn vec1;");
+}
+static void op_builtin_nonzero_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_DIV] = "/",
+		[OP_MOD] = "%",
+	};
+	int i;
+	if (op == OP_MOD && type == TYPE_FLOAT) {
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i);
+	} else {
+		for (i = 0; i < size; i++)
+			printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i);
+	}
+	puts("\treturn vec1;");
+	(void)bits;
+}
+static void op_cmp_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_CMPLT] = "<",
+		[OP_CMPLE] = "<=",
+		[OP_CMPEQ] = "==",
+		[OP_CMPGE] = ">=",
+		[OP_CMPGT] = ">",
+	};
+	int i;
+	/* this is portable for int uint and float*/
+	for (i = 0; i < size; i++)
+		printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8);
+	puts("\treturn vec1;");
+}
+static void op_minmax_pbody(int op, int type, int bits, int size)
+{
+	const char *ops[OP_FINAL_] = {
+		[OP_MIN] = "<",
+		[OP_MAX] = ">",
+	};
+	int i;
+	for (i = 0; i < size; i++)
+		printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i);
+	puts("\treturn vec1;");
+}
+static void op_splat_pbody(int op, int type, int bits, int size)
+{
+	int i;
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+	for (i = 0; i < size; i++)
+		printf("\tvec.generic[%d] = x;\n", i);
+	puts("\treturn vec;");
+}
+static void op_load_pbody(int op, int type, int bits, int size)
+{
+	int i;
+	printf("\t");
+	gen_print_vtype(type, bits, size);
+	printf(" vec;\n");
+	printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size);
+	puts("\treturn vec;");
+}
+static void op_store_pbody(int op, int type, int bits, int size)
+{
+	printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size);
+}
+/* ------------------------------------------------------------------------ */
+static struct op_impl op_impl[OP_FINAL_] = {
+	[OP_SPLAT] = {NULL, NULL, op_splat_pbody},
+	[OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody},
+	[OP_LOAD] = {NULL, NULL, op_load_pbody},
+	[OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody},
+	[OP_STORE] = {NULL, NULL, op_store_pbody},
+	/* arithmetic */
+	[OP_ADD] = {NULL, NULL, op_builtin_pbody},
+	[OP_SUB] = {NULL, NULL, op_builtin_pbody},
+	[OP_MUL] = {NULL, NULL, op_builtin_pbody},
+	[OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody},
+	[OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody},
+	[OP_AVG] = {NULL, NULL, op_builtin_avg_pbody},
+	/* bitwise */
+	[OP_AND] = {NULL, NULL, op_builtin_pbody},
+	[OP_OR] = {NULL, NULL, op_builtin_pbody},
+	[OP_XOR] = {NULL, NULL, op_builtin_pbody},
+	[OP_NOT] = {NULL, NULL, op_builtin_not_pbody},
+	/* min/max */
+	[OP_MIN] = {NULL, NULL, op_minmax_pbody},
+	[OP_MAX] = {NULL, NULL, op_minmax_pbody},
+	/* bitshift */
+	[OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+	[OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+	[OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
+	/* comparison */
+	[OP_CMPLT] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPLE] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPEQ] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPGE] = {NULL, NULL, op_cmp_pbody},
+	[OP_CMPGT] = {NULL, NULL, op_cmp_pbody},
+};
+int main(void)
+{
+	gen(op_impl, "generic");
+	return 0;
+}

Mercurial > vec

comparison gen/gengeneric.c @ 45:7955bed1d169 default tip