Mercurial > vec
comparison test/test_benchmark_vec.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | c6e0df09b86f |
children |
comparison
equal
deleted
inserted
replaced
44:b0a3f0248ecc | 45:7955bed1d169 |
---|---|
1 #include "vec/vec.h" | 1 #include "vec/vec.h" |
2 | 2 |
3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, | 3 #define DEFINE_MINMAX_BENCHMARK(TYPE,CTYPE,BITS,SIZE,MAX,MIN) \ |
4 uint32_t length, int32_t *pmin, int32_t *pmax) | 4 extern void test_benchmark_sample_minmax_##TYPE##BITS##x##SIZE##_impl(vec_##TYPE##BITS *smpl, \ |
5 { | 5 uint32_t length, vec_##TYPE##BITS *pmin, vec_##TYPE##BITS *pmax) \ |
6 int32_t smin = INT32_MAX, smax = INT32_MIN; | 6 { \ |
7 uint32_t len32; | 7 vec_##TYPE##BITS smin = MAX, smax = MIN; \ |
8 int i; | 8 uint32_t len32; \ |
9 vint16x8 min = vint16x8_splat(*pmin); | 9 int i; \ |
10 vint16x8 max = vint16x8_splat(*pmax); | 10 v##TYPE##BITS##x##SIZE min = v##TYPE##BITS##x##SIZE##_splat(*pmin); \ |
11 VINT16x8_ALIGNED_ARRAY(mins); | 11 v##TYPE##BITS##x##SIZE max = v##TYPE##BITS##x##SIZE##_splat(*pmax); \ |
12 VINT16x8_ALIGNED_ARRAY(maxs); | 12 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(mins); \ |
13 | 13 V##CTYPE##BITS##x##SIZE##_ALIGNED_ARRAY(maxs); \ |
14 len32 = length / 8; | 14 \ |
15 while (len32--) { | 15 len32 = length / SIZE; \ |
16 vint16x8 vec = vint16x8_load_aligned(smpl); | 16 while (len32--) { \ |
17 | 17 v##TYPE##BITS##x##SIZE vec = v##TYPE##BITS##x##SIZE##_load_aligned(smpl); \ |
18 min = vint16x8_min(vec, min); | 18 \ |
19 max = vint16x8_max(vec, max); | 19 min = v##TYPE##BITS##x##SIZE##_min(vec, min); \ |
20 | 20 max = v##TYPE##BITS##x##SIZE##_max(vec, max); \ |
21 smpl += 8; | 21 \ |
22 smpl += SIZE; \ | |
23 } \ | |
24 \ | |
25 v##TYPE##BITS##x##SIZE##_store_aligned(min, mins); \ | |
26 v##TYPE##BITS##x##SIZE##_store_aligned(max, maxs); \ | |
27 \ | |
28 /* get the lowest minimum of what we have left */ \ | |
29 for (i = 0; i < SIZE; i++) { \ | |
30 if (mins[i] < smin) smin = mins[i]; \ | |
31 if (maxs[i] > smax) smax = maxs[i]; \ | |
32 } \ | |
33 \ | |
34 len32 = length % SIZE; \ | |
35 while (len32--) { \ | |
36 if (*smpl < smin) smin = *smpl; \ | |
37 if (*smpl > smax) smax = *smpl; \ | |
38 \ | |
39 smpl++; \ | |
40 } \ | |
41 \ | |
42 *pmin = smin; \ | |
43 *pmax = smax; \ | |
22 } | 44 } |
23 | 45 |
24 vint16x8_store_aligned(min, mins); | 46 DEFINE_MINMAX_BENCHMARK(int,INT,8,2,INT8_MAX,INT8_MIN) |
25 vint16x8_store_aligned(max, maxs); | 47 DEFINE_MINMAX_BENCHMARK(int,INT,8,4,INT8_MAX,INT8_MIN) |
48 DEFINE_MINMAX_BENCHMARK(int,INT,8,8,INT8_MAX,INT8_MIN) | |
49 DEFINE_MINMAX_BENCHMARK(int,INT,8,16,INT8_MAX,INT8_MIN) | |
50 DEFINE_MINMAX_BENCHMARK(int,INT,8,32,INT8_MAX,INT8_MIN) | |
51 DEFINE_MINMAX_BENCHMARK(int,INT,8,64,INT8_MAX,INT8_MIN) | |
26 | 52 |
27 /* get the lowest minimum of what we have left */ | 53 DEFINE_MINMAX_BENCHMARK(int,INT,16,2,INT16_MAX,INT16_MIN) |
28 for (i = 0; i < 8; i++) { | 54 DEFINE_MINMAX_BENCHMARK(int,INT,16,4,INT16_MAX,INT16_MIN) |
29 if (mins[i] < smin) smin = mins[i]; | 55 DEFINE_MINMAX_BENCHMARK(int,INT,16,8,INT16_MAX,INT16_MIN) |
30 if (maxs[i] > smax) smax = maxs[i]; | 56 DEFINE_MINMAX_BENCHMARK(int,INT,16,16,INT16_MAX,INT16_MIN) |
31 } | 57 DEFINE_MINMAX_BENCHMARK(int,INT,16,32,INT16_MAX,INT16_MIN) |
32 | 58 |
33 len32 = length % 8; | 59 DEFINE_MINMAX_BENCHMARK(int,INT,32,2,INT32_MAX,INT32_MIN) |
34 while (len32--) { | 60 DEFINE_MINMAX_BENCHMARK(int,INT,32,4,INT32_MAX,INT32_MIN) |
35 if (*smpl < smin) smin = *smpl; | 61 DEFINE_MINMAX_BENCHMARK(int,INT,32,8,INT32_MAX,INT32_MIN) |
36 if (*smpl > smax) smax = *smpl; | 62 DEFINE_MINMAX_BENCHMARK(int,INT,32,16,INT32_MAX,INT32_MIN) |
37 | 63 |
38 smpl++; | 64 DEFINE_MINMAX_BENCHMARK(int,INT,64,2,INT64_MAX,INT64_MIN) |
39 } | 65 DEFINE_MINMAX_BENCHMARK(int,INT,64,4,INT64_MAX,INT64_MIN) |
66 DEFINE_MINMAX_BENCHMARK(int,INT,64,8,INT64_MAX,INT64_MIN) | |
40 | 67 |
41 *pmin = smin; | 68 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,2,UINT8_MAX,0) |
42 *pmax = smax; | 69 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,4,UINT8_MAX,0) |
43 } | 70 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,8,UINT8_MAX,0) |
71 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,16,UINT8_MAX,0) | |
72 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,32,UINT8_MAX,0) | |
73 DEFINE_MINMAX_BENCHMARK(uint,UINT,8,64,UINT8_MAX,0) | |
74 | |
75 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,2,UINT16_MAX,0) | |
76 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,4,UINT16_MAX,0) | |
77 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,8,UINT16_MAX,0) | |
78 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,16,UINT16_MAX,0) | |
79 DEFINE_MINMAX_BENCHMARK(uint,UINT,16,32,UINT16_MAX,0) | |
80 | |
81 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,2,UINT32_MAX,0) | |
82 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,4,UINT32_MAX,0) | |
83 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,8,UINT32_MAX,0) | |
84 DEFINE_MINMAX_BENCHMARK(uint,UINT,32,16,UINT32_MAX,0) | |
85 | |
86 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,2,UINT64_MAX,0) | |
87 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,4,UINT64_MAX,0) | |
88 DEFINE_MINMAX_BENCHMARK(uint,UINT,64,8,UINT64_MAX,0) | |
89 | |
90 DEFINE_MINMAX_BENCHMARK(f,F,32,2,1.0f,-1.0f) | |
91 DEFINE_MINMAX_BENCHMARK(f,F,32,4,1.0f,-1.0f) | |
92 DEFINE_MINMAX_BENCHMARK(f,F,32,8,1.0f,-1.0f) | |
93 DEFINE_MINMAX_BENCHMARK(f,F,32,16,1.0f,-1.0f) | |
94 | |
95 DEFINE_MINMAX_BENCHMARK(f,F,64,2,1.0,-1.0) | |
96 DEFINE_MINMAX_BENCHMARK(f,F,64,4,1.0,-1.0) | |
97 DEFINE_MINMAX_BENCHMARK(f,F,64,8,1.0,-1.0) |