Mercurial > vec
diff test/test_benchmark_vec.c @ 37:4b5a557aa64f
*: turns out extern is a practical joke. rewrite to be always inline again
the sample benchmark performs about 3x as well with optimizations
disabled :)
author | Paper <paper@tflc.us> |
---|---|
date | Sat, 26 Apr 2025 01:04:35 -0400 |
parents | |
children | c6e0df09b86f |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test/test_benchmark_vec.c Sat Apr 26 01:04:35 2025 -0400 @@ -0,0 +1,43 @@ +#include "vec/vec.h" + +extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, + uint32_t length, int32_t *pmin, int32_t *pmax) +{ + int32_t smin = INT32_MAX, smax = INT32_MIN; + uint32_t len32; + int i; + vint16x32 min = vint16x32_splat(*pmin); + vint16x32 max = vint16x32_splat(*pmax); + VINT16x32_ALIGNED_ARRAY(mins); + VINT16x32_ALIGNED_ARRAY(maxs); + + len32 = length / 32; + while (len32--) { + vint16x32 vec = vint16x32_load_aligned(smpl); + + min = vint16x32_min(vec, min); + max = vint16x32_max(vec, max); + + smpl += 32; + } + + vint16x32_store_aligned(min, mins); + vint16x32_store_aligned(max, maxs); + + /* get the lowest minimum of what we have left */ + for (i = 0; i < 32; i++) { + if (mins[i] < smin) smin = mins[i]; + if (maxs[i] > smax) smax = maxs[i]; + } + + len32 = length % 32; + while (len32--) { + if (*smpl < smin) smin = *smpl; + if (*smpl > smax) smax = *smpl; + + smpl++; + } + + *pmin = smin; + *pmax = smax; +}