comparison test/test_benchmark_vec.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents c6e0df09b86f
children
comparison
equal deleted inserted replaced
44:b0a3f0248ecc 45:7955bed1d169
1 #include "vec/vec.h" 1 #include "vec/vec.h"
2 2
3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, 3 #define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \
4 uint32_t length, int32_t *pmin, int32_t *pmax) 4 extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \
5 { 5 uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \
6 int32_t smin = INT32_MAX, smax = INT32_MIN; 6 { \
7 uint32_t len32; 7 vec_##TYPE##BITS smin = MAX, smax = MIN; \
8 int i; 8 uint32_t len32; \
9 vint16x8 min = vint16x8_splat(*pmin); 9 int i; \
10 vint16x8 max = vint16x8_splat(*pmax); 10 v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \
11 VINT16x8_ALIGNED_ARRAY(mins); 11 v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \
12 VINT16x8_ALIGNED_ARRAY(maxs); 12 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \
13 13 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \
14 len32 = length / 8; 14 \
15 while (len32--) { 15 len32 = length / SIZE; \
16 vint16x8 vec = vint16x8_load_aligned(smpl); 16 while (len32--) { \
17 17 v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \
18 min = vint16x8_min(vec, min); 18 \
19 max = vint16x8_max(vec, max); 19 min = v##TYPE##BITS##x##SIZE##_min(vec, min); \
20 20 max = v##TYPE##BITS##x##SIZE##_max(vec, max); \
21 smpl += 8; 21 \
22 smpl += SIZE; \
23 } \
24 \
25 v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \
26 v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \
27 \
28 /* get the lowest minimum of what we have left */ \
29 for (i = 0; i < SIZE; i++) { \
30 if (mins[i] < smin) smin = mins[i]; \
31 if (maxs[i] > smax) smax = maxs[i]; \
32 } \
33 \
34 len32 = length % SIZE; \
35 while (len32--) { \
36 if (*smpl < smin) smin = *smpl; \
37 if (*smpl > smax) smax = *smpl; \
38 \
39 smpl++; \
40 } \
41 \
42 *pmin = smin; \
43 *pmax = smax; \
22 } 44 }
23 45
24 vint16x8_store_aligned(min, mins); 46 DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN)
25 vint16x8_store_aligned(max, maxs); 47 DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN)
48 DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN)
49 DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN)
50 DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN)
51 DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN)
26 52
27 /* get the lowest minimum of what we have left */ 53 DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN)
28 for (i = 0; i < 8; i++) { 54 DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN)
29 if (mins[i] < smin) smin = mins[i]; 55 DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN)
30 if (maxs[i] > smax) smax = maxs[i]; 56 DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN)
31 } 57 DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN)
32 58
33 len32 = length % 8; 59 DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN)
34 while (len32--) { 60 DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN)
35 if (*smpl < smin) smin = *smpl; 61 DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN)
36 if (*smpl > smax) smax = *smpl; 62 DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN)
37 63
38 smpl++; 64 DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN)
39 } 65 DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN)
66 DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN)
40 67
41 *pmin = smin; 68 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0)
42 *pmax = smax; 69 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0)
43 } 70 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0)
71 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0)
72 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0)
73 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0)
74
75 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0)
76 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0)
77 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0)
78 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0)
79 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0)
80
81 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0)
82 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0)
83 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0)
84 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0)
85
86 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0)
87 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0)
88 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0)
89
90 DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f)
91 DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f)
92 DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f)
93 DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f)
94
95 DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0)
96 DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0)
97 DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0)