static int test_align(void)
{
	int ret = 0;

#define RUN_TEST(type, ctype, bits, size) \
	do { \
		int i; \
		/* allocate the aligned array */ \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr); \
	\
		/* fill the values */ \
		for (i = 0; i < size; i++) \
			vec_arr[i] = i; \
	\
		/* try to load it */ \
		v##type##bits##x##size vec = v##type##bits##x##size##_load(vec_arr); \
	\
		/* now allocate an output array */ \
		V##ctype##bits##x##size##_ALIGNED_ARRAY(vec_arr_out); \
	\
		/* try storing it */ \
		v##type##bits##x##size##_store_aligned(vec, vec_arr_out); \
	\
		/* mark success or failure */ \
		ret |= !!memcmp(vec_arr, vec_arr_out, size * (bits / 8)); \
	\
		ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr); \
		ret |= !V##ctype##bits##x##size##_PTR_ALIGNED(vec_arr_out); \
	} while (0);

#define RUN_TESTS(bits, size) \
	RUN_TEST(int, INT, bits, size) \
	RUN_TEST(uint, UINT, bits, size)

	RUN_TESTS(8, 2)

	RUN_TESTS(8, 4)
	RUN_TESTS(16, 2)

	RUN_TESTS(8, 8)
	RUN_TESTS(16, 4)
	RUN_TESTS(32, 2)

	RUN_TESTS(8, 16)
	RUN_TESTS(16, 8)
	RUN_TESTS(32, 4)
	RUN_TESTS(64, 2)

	RUN_TESTS(8, 32)
	RUN_TESTS(16, 16)
	RUN_TESTS(32, 8)
	RUN_TESTS(64, 4)

	RUN_TESTS(8, 64)
	RUN_TESTS(16, 32)
	RUN_TESTS(32, 16)
	RUN_TESTS(64, 8)

#undef RUN_TESTS

	/* floating point */
	RUN_TEST(f, F, 32, 2)
	RUN_TEST(f, F, 32, 4)
	RUN_TEST(f, F, 32, 8)
	RUN_TEST(f, F, 32, 16)

	RUN_TEST(f, F, 64, 2)
	RUN_TEST(f, F, 64, 4)
	RUN_TEST(f, F, 64, 8)

#undef RUN_TEST

	return ret;
}
