Mercurial > vec
comparison test/test_benchmark_vec.c @ 45:7955bed1d169
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
| author | Paper <paper@tflc.us> |
|---|---|
| date | Wed, 30 Apr 2025 18:36:38 -0400 |
| parents | c6e0df09b86f |
| children |
comparison
equal
deleted
inserted
replaced
| 44:b0a3f0248ecc | 45:7955bed1d169 |
|---|---|
| 1 #include "vec/vec.h" | 1 #include "vec/vec.h" |
| 2 | 2 |
| 3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, | 3 #define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \ |
| 4 uint32_t length, int32_t *pmin, int32_t *pmax) | 4 extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \ |
| 5 { | 5 uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \ |
| 6 int32_t smin = INT32_MAX, smax = INT32_MIN; | 6 { \ |
| 7 uint32_t len32; | 7 vec_##TYPE##BITS smin = MAX, smax = MIN; \ |
| 8 int i; | 8 uint32_t len32; \ |
| 9 vint16x8 min = vint16x8_splat(*pmin); | 9 int i; \ |
| 10 vint16x8 max = vint16x8_splat(*pmax); | 10 v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \ |
| 11 VINT16x8_ALIGNED_ARRAY(mins); | 11 v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \ |
| 12 VINT16x8_ALIGNED_ARRAY(maxs); | 12 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \ |
| 13 | 13 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \ |
| 14 len32 = length / 8; | 14 \ |
| 15 while (len32--) { | 15 len32 = length / SIZE; \ |
| 16 vint16x8 vec = vint16x8_load_aligned(smpl); | 16 while (len32--) { \ |
| 17 | 17 v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \ |
| 18 min = vint16x8_min(vec, min); | 18 \ |
| 19 max = vint16x8_max(vec, max); | 19 min = v##TYPE##BITS##x##SIZE##_min(vec, min); \ |
| 20 | 20 max = v##TYPE##BITS##x##SIZE##_max(vec, max); \ |
| 21 smpl += 8; | 21 \ |
| 22 smpl += SIZE; \ | |
| 23 } \ | |
| 24 \ | |
| 25 v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \ | |
| 26 v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \ | |
| 27 \ | |
| 28 /* get the lowest minimum of what we have left */ \ | |
| 29 for (i = 0; i < SIZE; i++) { \ | |
| 30 if (mins[i] < smin) smin = mins[i]; \ | |
| 31 if (maxs[i] > smax) smax = maxs[i]; \ | |
| 32 } \ | |
| 33 \ | |
| 34 len32 = length % SIZE; \ | |
| 35 while (len32--) { \ | |
| 36 if (*smpl < smin) smin = *smpl; \ | |
| 37 if (*smpl > smax) smax = *smpl; \ | |
| 38 \ | |
| 39 smpl++; \ | |
| 40 } \ | |
| 41 \ | |
| 42 *pmin = smin; \ | |
| 43 *pmax = smax; \ | |
| 22 } | 44 } |
| 23 | 45 |
| 24 vint16x8_store_aligned(min, mins); | 46 DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN) |
| 25 vint16x8_store_aligned(max, maxs); | 47 DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN) |
| 48 DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN) | |
| 49 DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN) | |
| 50 DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN) | |
| 51 DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN) | |
| 26 | 52 |
| 27 /* get the lowest minimum of what we have left */ | 53 DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN) |
| 28 for (i = 0; i < 8; i++) { | 54 DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN) |
| 29 if (mins[i] < smin) smin = mins[i]; | 55 DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN) |
| 30 if (maxs[i] > smax) smax = maxs[i]; | 56 DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN) |
| 31 } | 57 DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN) |
| 32 | 58 |
| 33 len32 = length % 8; | 59 DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN) |
| 34 while (len32--) { | 60 DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN) |
| 35 if (*smpl < smin) smin = *smpl; | 61 DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN) |
| 36 if (*smpl > smax) smax = *smpl; | 62 DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN) |
| 37 | 63 |
| 38 smpl++; | 64 DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN) |
| 39 } | 65 DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN) |
| 66 DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN) | |
| 40 | 67 |
| 41 *pmin = smin; | 68 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0) |
| 42 *pmax = smax; | 69 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0) |
| 43 } | 70 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0) |
| 71 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0) | |
| 72 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0) | |
| 73 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0) | |
| 74 | |
| 75 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0) | |
| 76 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0) | |
| 77 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0) | |
| 78 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0) | |
| 79 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0) | |
| 80 | |
| 81 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0) | |
| 82 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0) | |
| 83 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0) | |
| 84 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0) | |
| 85 | |
| 86 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0) | |
| 87 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0) | |
| 88 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0) | |
| 89 | |
| 90 DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f) | |
| 91 DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f) | |
| 92 DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f) | |
| 93 DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f) | |
| 94 | |
| 95 DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0) | |
| 96 DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0) | |
| 97 DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0) |
