comparison test/test_benchmark_vec.c @ 37:4b5a557aa64f

*: turns out extern is a practical joke. rewrite to be always inline again the sample benchmark performs about 3x as well with optimizations disabled :)
author Paper <paper@tflc.us>
date Sat, 26 Apr 2025 01:04:35 -0400
parents
children c6e0df09b86f
comparison
equal deleted inserted replaced
36:677c03c382b8 37:4b5a557aa64f
1 #include "vec/vec.h"
2
3 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl,
4 uint32_t length, int32_t *pmin, int32_t *pmax)
5 {
6 int32_t smin = INT32_MAX, smax = INT32_MIN;
7 uint32_t len32;
8 int i;
9 vint16x32 min = vint16x32_splat(*pmin);
10 vint16x32 max = vint16x32_splat(*pmax);
11 VINT16x32_ALIGNED_ARRAY(mins);
12 VINT16x32_ALIGNED_ARRAY(maxs);
13
14 len32 = length / 32;
15 while (len32--) {
16 vint16x32 vec = vint16x32_load_aligned(smpl);
17
18 min = vint16x32_min(vec, min);
19 max = vint16x32_max(vec, max);
20
21 smpl += 32;
22 }
23
24 vint16x32_store_aligned(min, mins);
25 vint16x32_store_aligned(max, maxs);
26
27 /* get the lowest minimum of what we have left */
28 for (i = 0; i < 32; i++) {
29 if (mins[i] < smin) smin = mins[i];
30 if (maxs[i] > smax) smax = maxs[i];
31 }
32
33 len32 = length % 32;
34 while (len32--) {
35 if (*smpl < smin) smin = *smpl;
36 if (*smpl > smax) smax = *smpl;
37
38 smpl++;
39 }
40
41 *pmin = smin;
42 *pmax = smax;
43 }