Mercurial > vec
view test/test_benchmark_vec.c @ 37:4b5a557aa64f
*: turns out extern is a practical joke. rewrite to be always inline again
the sample benchmark performs about 3x as well with optimizations
disabled :)
author | Paper <paper@tflc.us> |
---|---|
date | Sat, 26 Apr 2025 01:04:35 -0400 |
parents | |
children |
line wrap: on
line source
#include "vec/vec.h" extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax) { int32_t smin = INT32_MAX, smax = INT32_MIN; uint32_t len32; int i; vint16x32 min = vint16x32_splat(*pmin); vint16x32 max = vint16x32_splat(*pmax); VINT16x32_ALIGNED_ARRAY(mins); VINT16x32_ALIGNED_ARRAY(maxs); len32 = length / 32; while (len32--) { vint16x32 vec = vint16x32_load_aligned(smpl); min = vint16x32_min(vec, min); max = vint16x32_max(vec, max); smpl += 32; } vint16x32_store_aligned(min, mins); vint16x32_store_aligned(max, maxs); /* get the lowest minimum of what we have left */ for (i = 0; i < 32; i++) { if (mins[i] < smin) smin = mins[i]; if (maxs[i] > smax) smax = maxs[i]; } len32 = length % 32; while (len32--) { if (*smpl < smin) smin = *smpl; if (*smpl > smax) smax = *smpl; smpl++; } *pmin = smin; *pmax = smax; }