Mercurial > vec
comparison test/test_benchmark_vec.c @ 37:4b5a557aa64f
*: turns out extern is a practical joke. rewrite to be always inline again
the sample benchmark performs about 3x as well with optimizations
disabled :)
author | Paper <paper@tflc.us> |
---|---|
date | Sat, 26 Apr 2025 01:04:35 -0400 |
parents | |
children | c6e0df09b86f |
comparison
equal
deleted
inserted
replaced
36:677c03c382b8 | 37:4b5a557aa64f |
---|---|
1 #include "vec/vec.h" | |
2 | |
3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, | |
4 uint32_t length, int32_t *pmin, int32_t *pmax) | |
5 { | |
6 int32_t smin = INT32_MAX, smax = INT32_MIN; | |
7 uint32_t len32; | |
8 int i; | |
9 vint16x32 min = vint16x32_splat(*pmin); | |
10 vint16x32 max = vint16x32_splat(*pmax); | |
11 VINT16x32_ALIGNED_ARRAY(mins); | |
12 VINT16x32_ALIGNED_ARRAY(maxs); | |
13 | |
14 len32 = length / 32; | |
15 while (len32--) { | |
16 vint16x32 vec = vint16x32_load_aligned(smpl); | |
17 | |
18 min = vint16x32_min(vec, min); | |
19 max = vint16x32_max(vec, max); | |
20 | |
21 smpl += 32; | |
22 } | |
23 | |
24 vint16x32_store_aligned(min, mins); | |
25 vint16x32_store_aligned(max, maxs); | |
26 | |
27 /* get the lowest minimum of what we have left */ | |
28 for (i = 0; i < 32; i++) { | |
29 if (mins[i] < smin) smin = mins[i]; | |
30 if (maxs[i] > smax) smax = maxs[i]; | |
31 } | |
32 | |
33 len32 = length % 32; | |
34 while (len32--) { | |
35 if (*smpl < smin) smin = *smpl; | |
36 if (*smpl > smax) smax = *smpl; | |
37 | |
38 smpl++; | |
39 } | |
40 | |
41 *pmin = smin; | |
42 *pmax = smax; | |
43 } |