view test/test_benchmark_vec.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents c6e0df09b86f
children
line wrap: on
line source

#include "vec/vec.h"

#define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \
	extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \
		uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \
	{ \
		vec_##TYPE##BITS smin = MAX, smax = MIN; \
		uint32_t len32; \
		int i; \
		v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \
		v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \
		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \
		V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \
	\
		len32 = length / SIZE; \
		while (len32--) { \
			v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \
	\
			min = v##TYPE##BITS##x##SIZE##_min(vec, min); \
			max = v##TYPE##BITS##x##SIZE##_max(vec, max); \
	\
			smpl += SIZE; \
		} \
	\
		v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \
		v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \
	\
		/* get the lowest minimum of what we have left */ \
		for (i = 0; i < SIZE; i++) { \
			if (mins[i] < smin) smin = mins[i]; \
			if (maxs[i] > smax) smax = maxs[i]; \
		} \
	\
		len32 = length % SIZE; \
		while (len32--) { \
			if (*smpl < smin) smin = *smpl; \
			if (*smpl > smax) smax = *smpl; \
	\
			smpl++; \
		} \
	\
		*pmin = smin; \
		*pmax = smax; \
	}

DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN)

DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN)

DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN)

DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN)
DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN)

DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0)

DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0)

DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0)

DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0)
DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0)

DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f)
DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f)
DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f)
DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f)

DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0)
DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0)
DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)