diff test/test_benchmark_vec.c @ 41:c6e0df09b86f

*: performance improvements with old GCC, reimplement altivec
author Paper <paper@tflc.us>
date Mon, 28 Apr 2025 16:31:59 -0400
parents 4b5a557aa64f
children
line wrap: on
line diff
--- a/test/test_benchmark_vec.c	Sun Apr 27 02:49:53 2025 -0400
+++ b/test/test_benchmark_vec.c	Mon Apr 28 16:31:59 2025 -0400
@@ -6,31 +6,31 @@
 	int32_t smin = INT32_MAX, smax = INT32_MIN;
 	uint32_t len32;
 	int i;
-	vint16x32 min = vint16x32_splat(*pmin);
-	vint16x32 max = vint16x32_splat(*pmax);
-	VINT16x32_ALIGNED_ARRAY(mins);
-	VINT16x32_ALIGNED_ARRAY(maxs);
+	vint16x8 min = vint16x8_splat(*pmin);
+	vint16x8 max = vint16x8_splat(*pmax);
+	VINT16x8_ALIGNED_ARRAY(mins);
+	VINT16x8_ALIGNED_ARRAY(maxs);
 
-	len32 = length / 32;
+	len32 = length / 8;
 	while (len32--) {
-		vint16x32 vec = vint16x32_load_aligned(smpl);
+		vint16x8 vec = vint16x8_load_aligned(smpl);
 
-		min = vint16x32_min(vec, min);
-		max = vint16x32_max(vec, max);
+		min = vint16x8_min(vec, min);
+		max = vint16x8_max(vec, max);
 
-		smpl += 32;
+		smpl += 8;
 	}
 
-	vint16x32_store_aligned(min, mins);
-	vint16x32_store_aligned(max, maxs);
+	vint16x8_store_aligned(min, mins);
+	vint16x8_store_aligned(max, maxs);
 
 	/* get the lowest minimum of what we have left */
-	for (i = 0; i < 32; i++) {
+	for (i = 0; i < 8; i++) {
 		if (mins[i] < smin) smin = mins[i];
 		if (maxs[i] > smax) smax = maxs[i];
 	}
 
-	len32 = length % 32;
+	len32 = length % 8;
 	while (len32--) {
 		if (*smpl < smin) smin = *smpl;
 		if (*smpl > smax) smax = *smpl;