Mercurial > vec
diff test/test_benchmark_vec.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | c6e0df09b86f |
children |
line wrap: on
line diff
--- a/test/test_benchmark_vec.c Tue Apr 29 16:54:13 2025 -0400 +++ b/test/test_benchmark_vec.c Wed Apr 30 18:36:38 2025 -0400 @@ -1,43 +1,97 @@ #include "vec/vec.h" -extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, - uint32_t length, int32_t *pmin, int32_t *pmax) -{ - int32_t smin = INT32_MAX, smax = INT32_MIN; - uint32_t len32; - int i; - vint16x8 min = vint16x8_splat(*pmin); - vint16x8 max = vint16x8_splat(*pmax); - VINT16x8_ALIGNED_ARRAY(mins); - VINT16x8_ALIGNED_ARRAY(maxs); - - len32 = length / 8; - while (len32--) { - vint16x8 vec = vint16x8_load_aligned(smpl); - - min = vint16x8_min(vec, min); - max = vint16x8_max(vec, max); - - smpl += 8; +#define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \ + extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \ + uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \ + { \ + vec_##TYPE##BITS smin = MAX, smax = MIN; \ + uint32_t len32; \ + int i; \ + v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \ + v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \ + V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \ + V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \ + \ + len32 = length / SIZE; \ + while (len32--) { \ + v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \ + \ + min = v##TYPE##BITS##x##SIZE##_min(vec, min); \ + max = v##TYPE##BITS##x##SIZE##_max(vec, max); \ + \ + smpl += SIZE; \ + } \ + \ + v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \ + v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \ + \ + /* get the lowest minimum of what we have left */ \ + for (i = 0; i < SIZE; i++) { \ + if (mins[i] < smin) smin = mins[i]; \ + if (maxs[i] > smax) smax = maxs[i]; \ + } \ + \ + len32 = length % SIZE; \ + while (len32--) { \ + if (*smpl < smin) smin = *smpl; \ + if (*smpl > smax) smax = *smpl; \ + \ + smpl++; \ + } \ + \ + *pmin = smin; \ + *pmax = smax; \ } - vint16x8_store_aligned(min, mins); - vint16x8_store_aligned(max, maxs); +DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN) - /* get the lowest minimum of what we have left */ - for (i = 0; i < 8; i++) { - if (mins[i] < smin) smin = mins[i]; - if (maxs[i] > smax) smax = maxs[i]; - } +DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN) + +DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN) + +DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN) +DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN) - len32 = length % 8; - while (len32--) { - if (*smpl < smin) smin = *smpl; - if (*smpl > smax) smax = *smpl; +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0) + +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0) - smpl++; - } +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0) + +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0) +DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0) - *pmin = smin; - *pmax = smax; -} +DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f) +DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f) + +DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0) +DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0) +DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)