diff test/test_benchmark_vec.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents c6e0df09b86f
children
line wrap: on
line diff
--- a/test/test_benchmark_vec.c	Tue Apr 29 16:54:13 2025 -0400
+++ b/test/test_benchmark_vec.c	Wed Apr 30 18:36:38 2025 -0400
@@ -1,43 +1,97 @@
 #include "vec/vec.h"
 
-extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl,
-	uint32_t length, int32_t *pmin, int32_t *pmax)
-{
-	int32_t smin = INT32_MAX, smax = INT32_MIN;
-	uint32_t len32;
-	int i;
-	vint16x8 min = vint16x8_splat(*pmin);
-	vint16x8 max = vint16x8_splat(*pmax);
-	VINT16x8_ALIGNED_ARRAY(mins);
-	VINT16x8_ALIGNED_ARRAY(maxs);
-
-	len32 = length / 8;
-	while (len32--) {
-		vint16x8 vec = vint16x8_load_aligned(smpl);
-
-		min = vint16x8_min(vec, min);
-		max = vint16x8_max(vec, max);
-
-		smpl += 8;
+#define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \
+	extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \
+		uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \
+	{ \
+		vec_##TYPE##BITS smin = MAX, smax = MIN; \
+		uint32_t len32; \
+		int i; \
+		v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \
+		v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \
+		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \
+		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \
+	\
+		len32 = length / SIZE; \
+		while (len32--) { \
+			v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \
+	\
+			min = v##TYPE##BITS##x##SIZE##_min(vec, min); \
+			max = v##TYPE##BITS##x##SIZE##_max(vec, max); \
+	\
+			smpl += SIZE; \
+		} \
+	\
+		v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \
+		v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \
+	\
+		/* get the lowest minimum of what we have left */ \
+		for (i = 0; i < SIZE; i++) { \
+			if (mins[i] < smin) smin = mins[i]; \
+			if (maxs[i] > smax) smax = maxs[i]; \
+		} \
+	\
+		len32 = length % SIZE; \
+		while (len32--) { \
+			if (*smpl < smin) smin = *smpl; \
+			if (*smpl > smax) smax = *smpl; \
+	\
+			smpl++; \
+		} \
+	\
+		*pmin = smin; \
+		*pmax = smax; \
 	}
 
-	vint16x8_store_aligned(min, mins);
-	vint16x8_store_aligned(max, maxs);
+DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN)
 
-	/* get the lowest minimum of what we have left */
-	for (i = 0; i < 8; i++) {
-		if (mins[i] < smin) smin = mins[i];
-		if (maxs[i] > smax) smax = maxs[i];
-	}
+DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN)
+
+DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN)
+
+DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN)
+DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN)
 
-	len32 = length % 8;
-	while (len32--) {
-		if (*smpl < smin) smin = *smpl;
-		if (*smpl > smax) smax = *smpl;
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0)
+
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0)
 
-		smpl++;
-	}
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0)
+
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0)
+DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0)
 
-	*pmin = smin;
-	*pmax = smax;
-}
+DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f)
+DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f)
+
+DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0)
+DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0)
+DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)