Mercurial > vec
comparison test/test_benchmark_vec.c @ 41:c6e0df09b86f default tip
*: performance improvements with old GCC, reimplement altivec
author | Paper <paper@tflc.us> |
---|---|
date | Mon, 28 Apr 2025 16:31:59 -0400 |
parents | 4b5a557aa64f |
children |
comparison
equal
deleted
inserted
replaced
40:55cadb1fac4b | 41:c6e0df09b86f |
---|---|
4 uint32_t length, int32_t *pmin, int32_t *pmax) | 4 uint32_t length, int32_t *pmin, int32_t *pmax) |
5 { | 5 { |
6 int32_t smin = INT32_MAX, smax = INT32_MIN; | 6 int32_t smin = INT32_MAX, smax = INT32_MIN; |
7 uint32_t len32; | 7 uint32_t len32; |
8 int i; | 8 int i; |
9 vint16x32 min = vint16x32_splat(*pmin); | 9 vint16x8 min = vint16x8_splat(*pmin); |
10 vint16x32 max = vint16x32_splat(*pmax); | 10 vint16x8 max = vint16x8_splat(*pmax); |
11 VINT16x32_ALIGNED_ARRAY(mins); | 11 VINT16x8_ALIGNED_ARRAY(mins); |
12 VINT16x32_ALIGNED_ARRAY(maxs); | 12 VINT16x8_ALIGNED_ARRAY(maxs); |
13 | 13 |
14 len32 = length / 32; | 14 len32 = length / 8; |
15 while (len32--) { | 15 while (len32--) { |
16 vint16x32 vec = vint16x32_load_aligned(smpl); | 16 vint16x8 vec = vint16x8_load_aligned(smpl); |
17 | 17 |
18 min = vint16x32_min(vec, min); | 18 min = vint16x8_min(vec, min); |
19 max = vint16x32_max(vec, max); | 19 max = vint16x8_max(vec, max); |
20 | 20 |
21 smpl += 32; | 21 smpl += 8; |
22 } | 22 } |
23 | 23 |
24 vint16x32_store_aligned(min, mins); | 24 vint16x8_store_aligned(min, mins); |
25 vint16x32_store_aligned(max, maxs); | 25 vint16x8_store_aligned(max, maxs); |
26 | 26 |
27 /* get the lowest minimum of what we have left */ | 27 /* get the lowest minimum of what we have left */ |
28 for (i = 0; i < 32; i++) { | 28 for (i = 0; i < 8; i++) { |
29 if (mins[i] < smin) smin = mins[i]; | 29 if (mins[i] < smin) smin = mins[i]; |
30 if (maxs[i] > smax) smax = maxs[i]; | 30 if (maxs[i] > smax) smax = maxs[i]; |
31 } | 31 } |
32 | 32 |
33 len32 = length % 32; | 33 len32 = length % 8; |
34 while (len32--) { | 34 while (len32--) { |
35 if (*smpl < smin) smin = *smpl; | 35 if (*smpl < smin) smin = *smpl; |
36 if (*smpl > smax) smax = *smpl; | 36 if (*smpl > smax) smax = *smpl; |
37 | 37 |
38 smpl++; | 38 smpl++; |