Mercurial > vec
comparison test/test_benchmark_vec.c @ 37:4b5a557aa64f
*: turns out extern is a practical joke. rewrite to be always inline again
the sample benchmark performs about 3x as well with optimizations
disabled :)
| author | Paper <paper@tflc.us> |
|---|---|
| date | Sat, 26 Apr 2025 01:04:35 -0400 |
| parents | |
| children | c6e0df09b86f |
comparison
equal
deleted
inserted
replaced
| 36:677c03c382b8 | 37:4b5a557aa64f |
|---|---|
| 1 #include "vec/vec.h" | |
| 2 | |
| 3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, | |
| 4 uint32_t length, int32_t *pmin, int32_t *pmax) | |
| 5 { | |
| 6 int32_t smin = INT32_MAX, smax = INT32_MIN; | |
| 7 uint32_t len32; | |
| 8 int i; | |
| 9 vint16x32 min = vint16x32_splat(*pmin); | |
| 10 vint16x32 max = vint16x32_splat(*pmax); | |
| 11 VINT16x32_ALIGNED_ARRAY(mins); | |
| 12 VINT16x32_ALIGNED_ARRAY(maxs); | |
| 13 | |
| 14 len32 = length / 32; | |
| 15 while (len32--) { | |
| 16 vint16x32 vec = vint16x32_load_aligned(smpl); | |
| 17 | |
| 18 min = vint16x32_min(vec, min); | |
| 19 max = vint16x32_max(vec, max); | |
| 20 | |
| 21 smpl += 32; | |
| 22 } | |
| 23 | |
| 24 vint16x32_store_aligned(min, mins); | |
| 25 vint16x32_store_aligned(max, maxs); | |
| 26 | |
| 27 /* get the lowest minimum of what we have left */ | |
| 28 for (i = 0; i < 32; i++) { | |
| 29 if (mins[i] < smin) smin = mins[i]; | |
| 30 if (maxs[i] > smax) smax = maxs[i]; | |
| 31 } | |
| 32 | |
| 33 len32 = length % 32; | |
| 34 while (len32--) { | |
| 35 if (*smpl < smin) smin = *smpl; | |
| 36 if (*smpl > smax) smax = *smpl; | |
| 37 | |
| 38 smpl++; | |
| 39 } | |
| 40 | |
| 41 *pmin = smin; | |
| 42 *pmax = smax; | |
| 43 } |
