#define CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, secondtype, secondctype) \
	static int test_arith_v##type##bits##x##size##_##op(v##type##bits##x##size a, v##secondtype##bits##x##size b) \
	{ \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_a); \
		V##secondctype##bits##x##size##_ALIGNED_ARRAY(orig_b); \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_c); \
	\
		v##type##bits##x##size c = v##type##bits##x##size##_##op(a, b); \
	\
		v##type##bits##x##size##_store_aligned(a, orig_a); \
		v##secondtype##bits##x##size##_store_aligned(b, orig_b); \
		v##type##bits##x##size##_store_aligned(c, orig_c); \
	\
		for (int i = 0; i < size; i++) { \
			if ((vec_##type##bits)(equiv) != orig_c[i]) { \
				fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" print "] does not equal result [%" print "]!\n", i, #equiv, (vec_##type##bits)(equiv), orig_c[i]); \
				print_v##type##bits##x##size(stderr,a); \
				print_v##secondtype##bits##x##size(stderr,b); \
				print_v##type##bits##x##size(stderr,c); \
				fprintf(stderr, "\n"); \
				return 1; \
			} \
		} \
	\
		return 0; \
	}

#define CREATE_TEST_ONEPARAM(type, ctype, print, bits, size, op, equiv) \
	static int test_arith_v##type##bits##x##size##_##op(v##type##bits##x##size a) \
	{ \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_a); \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(orig_c); \
	\
		v##type##bits##x##size c = v##type##bits##x##size##_##op(a); \
	\
		v##type##bits##x##size##_store_aligned(a, orig_a); \
		v##type##bits##x##size##_store_aligned(c, orig_c); \
	\
		for (int i = 0; i < size; i++) { \
			if ((vec_##type##bits)(equiv) != orig_c[i]) { \
				fprintf(stderr, "v" #type #bits "x" #size "_" #op " test FAILED at index %d: (%s) [%" print "] does not equal result [%" print "]!\n", i, #equiv, (vec_##type##bits)(equiv), orig_c[i]); \
				print_v##type##bits##x##size(stderr,a); \
				print_v##type##bits##x##size(stderr,c); \
				fprintf(stderr, "\n"); \
				return 1; \
			} \
		} \
	\
		return 0; \
	}

#define CREATE_TEST(type, ctype, print, bits, size, op, equiv) \
	CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, type, ctype)

#define CREATE_TEST_SHIFT(type, ctype, print, bits, size, op, equiv) \
	CREATE_TEST_EX(type, ctype, print, bits, size, op, equiv, uint, UINT)

#define CREATE_TESTS_INT(type, ctype, sign, print, bits, size) \
	CREATE_TEST(type, ctype, print, bits, size, add, orig_a[i] + orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, sub, orig_a[i] - orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, mul, orig_a[i] * orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
	CREATE_TEST(type, ctype, print, bits, size, mod, (orig_b[i]) ? (orig_a[i] % orig_b[i]) : 0) \
	CREATE_TEST(type, ctype, print, bits, size, and, orig_a[i] & orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, or,  orig_a[i] | orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, xor, orig_a[i] ^ orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, avg, (vec_##type##bits)vec_im##sign##avg(orig_a[i], orig_b[i])) \
	CREATE_TEST_SHIFT(type, ctype, print, bits, size, rshift, vec_##sign##rshift(orig_a[i], orig_b[i])) \
	CREATE_TEST_SHIFT(type, ctype, print, bits, size, lshift, vec_##sign##lshift(orig_a[i], orig_b[i])) \
	CREATE_TEST_SHIFT(type, ctype, print, bits, size, lrshift, vec_urshift((vec_uint##bits)orig_a[i], orig_b[i])) \
	CREATE_TEST(type, ctype, print, bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \
	CREATE_TEST(type, ctype, print, bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i])

#define CREATE_TESTS_FLOAT(bits, size) \
	CREATE_TEST(f, F, "f", bits, size, add, orig_a[i] + orig_b[i]) \
	CREATE_TEST(f, F, "f", bits, size, sub, orig_a[i] - orig_b[i]) \
	CREATE_TEST(f, F, "f", bits, size, mul, orig_a[i] * orig_b[i]) \
	CREATE_TEST(f, F, "f", bits, size, div, (orig_b[i]) ? (orig_a[i] / orig_b[i]) : 0) \
	CREATE_TEST(f, F, "f", bits, size, mod, (orig_b[i]) ? (fmod(orig_a[i], orig_b[i])) : 0) \
	CREATE_TEST(f, F, "f", bits, size, avg, (orig_a[i] + orig_b[i]) / 2) \
	CREATE_TEST(f, F, "f", bits, size, min, (orig_a[i] < orig_b[i]) ? orig_a[i] : orig_b[i]) \
	CREATE_TEST(f, F, "f", bits, size, max, (orig_a[i] > orig_b[i]) ? orig_a[i] : orig_b[i]) \
	CREATE_TEST_ONEPARAM(f, F, "f", bits, size, round, round(orig_a[i])) \
	CREATE_TEST_ONEPARAM(f, F, "f", bits, size, ceil,  ceil(orig_a[i])) \
	CREATE_TEST_ONEPARAM(f, F, "f", bits, size, floor, floor(orig_a[i])) \
	CREATE_TEST_ONEPARAM(f, F, "f", bits, size, trunc, trunc(orig_a[i]))

#define CREATE_TESTS(bits, size) \
	CREATE_TESTS_INT(int,  INT,  /* nothing */, PRI##d##bits, bits, size) \
	CREATE_TESTS_INT(uint, UINT, u,             PRI##u##bits, bits, size)

CREATE_TESTS(8, 2)

CREATE_TESTS(8, 4)
CREATE_TESTS(16, 2)

CREATE_TESTS(8, 8)
CREATE_TESTS(16, 4)
CREATE_TESTS(32, 2)

CREATE_TESTS(8, 16)
CREATE_TESTS(16, 8)
CREATE_TESTS(32, 4)
CREATE_TESTS(64, 2)

CREATE_TESTS(8, 32)
CREATE_TESTS(16, 16)
CREATE_TESTS(32, 8)
CREATE_TESTS(64, 4)

CREATE_TESTS(8, 64)
CREATE_TESTS(16, 32)
CREATE_TESTS(32, 16)
CREATE_TESTS(64, 8)

CREATE_TESTS_FLOAT(32, 2)
CREATE_TESTS_FLOAT(32, 4)
CREATE_TESTS_FLOAT(32, 8)
CREATE_TESTS_FLOAT(32, 16)

CREATE_TESTS_FLOAT(64, 2)
CREATE_TESTS_FLOAT(64, 4)
CREATE_TESTS_FLOAT(64, 8)

#undef CREATE_TESTS_INT
#undef CREATE_TESTS_FLOAT
#undef CREATE_TESTS
#undef CREATE_TEST
#undef CREATE_TEST_SHIFT

static int test_arith(void)
{
	int ret = 0;

#define RUN_TESTS_SIGN(shorttype, type, bits, size) \
	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
		for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \
			const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \
			ret |= test_arith_v##type##bits##x##size##_add(a, b); \
			ret |= test_arith_v##type##bits##x##size##_sub(a, b); \
			ret |= test_arith_v##type##bits##x##size##_mul(a, b); \
			ret |= test_arith_v##type##bits##x##size##_div(a, b); \
			ret |= test_arith_v##type##bits##x##size##_mod(a, b); \
			ret |= test_arith_v##type##bits##x##size##_and(a, b); \
			ret |= test_arith_v##type##bits##x##size##_or(a, b); \
			ret |= test_arith_v##type##bits##x##size##_xor(a, b); \
			ret |= test_arith_v##type##bits##x##size##_avg(a, b); \
			ret |= test_arith_v##type##bits##x##size##_min(a, b); \
			ret |= test_arith_v##type##bits##x##size##_max(a, b); \
		} \
	} \
	\
	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
		for (uint32_t j = 0U; j < bits; j++) { \
			const vuint##bits##x##size b = vuint##bits##x##size##_splat(j); \
			ret |= test_arith_v##type##bits##x##size##_rshift(a, b); \
			ret |= test_arith_v##type##bits##x##size##_lshift(a, b); \
			ret |= test_arith_v##type##bits##x##size##_lrshift(a, b); \
		} \
	}

#define RUN_TESTS(bits, size) \
	RUN_TESTS_SIGN( , int,  bits, size) \
	RUN_TESTS_SIGN(u, uint, bits, size)

#define RUN_TESTS_FLOAT(shorttype, type, bits, size) \
	for (size_t i = 0U; i < ARRAY_SIZE(testval##shorttype##bits); i++) { \
		const v##type##bits##x##size a = vtest##shorttype##bits##x##size(i); \
		for (size_t j = 0U; j < ARRAY_SIZE(testval##shorttype##bits); j++) { \
			const v##type##bits##x##size b = vtest##shorttype##bits##x##size(j); \
			ret |= test_arith_v##type##bits##x##size##_add(a, b); \
			ret |= test_arith_v##type##bits##x##size##_sub(a, b); \
			ret |= test_arith_v##type##bits##x##size##_mul(a, b); \
			ret |= test_arith_v##type##bits##x##size##_div(a, b); \
			ret |= test_arith_v##type##bits##x##size##_mod(a, b); \
			ret |= test_arith_v##type##bits##x##size##_avg(a, b); \
			ret |= test_arith_v##type##bits##x##size##_min(a, b); \
			ret |= test_arith_v##type##bits##x##size##_max(a, b); \
		} \
		ret |= test_arith_v##type##bits##x##size##_floor(a); \
		ret |= test_arith_v##type##bits##x##size##_ceil(a); \
		ret |= test_arith_v##type##bits##x##size##_round(a); \
		ret |= test_arith_v##type##bits##x##size##_trunc(a); \
	}

	RUN_TESTS(8, 2)

	RUN_TESTS(8, 4)
	RUN_TESTS(16, 2)

	RUN_TESTS(8, 8)
	RUN_TESTS(16, 4)
	RUN_TESTS(32, 2)

	RUN_TESTS(8, 16)
	RUN_TESTS(16, 8)
	RUN_TESTS(32, 4)
	RUN_TESTS(64, 2)

	RUN_TESTS(8, 32)
	RUN_TESTS(16, 16)
	RUN_TESTS(32, 8)
	RUN_TESTS(64, 4)

	RUN_TESTS(8, 64)
	RUN_TESTS(16, 32)
	RUN_TESTS(32, 16)
	RUN_TESTS(64, 8)

	RUN_TESTS_FLOAT(f, f, 32, 2)
	RUN_TESTS_FLOAT(f, f, 32, 4)
	RUN_TESTS_FLOAT(f, f, 32, 8)
	RUN_TESTS_FLOAT(f, f, 32, 16)

	RUN_TESTS_FLOAT(f, f, 64, 2)
	RUN_TESTS_FLOAT(f, f, 64, 4)
	RUN_TESTS_FLOAT(f, f, 64, 8)

#undef RUN_TESTS_FLOAT
#undef RUN_TESTS_SIGN
#undef RUN_TESTS

	return ret;
}
