comparison test/test_benchmark_vec.c @ 41:c6e0df09b86f default tip

*: performance improvements with old GCC, reimplement altivec
author Paper <paper@tflc.us>
date Mon, 28 Apr 2025 16:31:59 -0400
parents 4b5a557aa64f
children
comparison
equal deleted inserted replaced
40:55cadb1fac4b 41:c6e0df09b86f
4 uint32_t length, int32_t *pmin, int32_t *pmax) 4 uint32_t length, int32_t *pmin, int32_t *pmax)
5 { 5 {
6 int32_t smin = INT32_MAX, smax = INT32_MIN; 6 int32_t smin = INT32_MAX, smax = INT32_MIN;
7 uint32_t len32; 7 uint32_t len32;
8 int i; 8 int i;
9 vint16x32 min = vint16x32_splat(*pmin); 9 vint16x8 min = vint16x8_splat(*pmin);
10 vint16x32 max = vint16x32_splat(*pmax); 10 vint16x8 max = vint16x8_splat(*pmax);
11 VINT16x32_ALIGNED_ARRAY(mins); 11 VINT16x8_ALIGNED_ARRAY(mins);
12 VINT16x32_ALIGNED_ARRAY(maxs); 12 VINT16x8_ALIGNED_ARRAY(maxs);
13 13
14 len32 = length / 32; 14 len32 = length / 8;
15 while (len32--) { 15 while (len32--) {
16 vint16x32 vec = vint16x32_load_aligned(smpl); 16 vint16x8 vec = vint16x8_load_aligned(smpl);
17 17
18 min = vint16x32_min(vec, min); 18 min = vint16x8_min(vec, min);
19 max = vint16x32_max(vec, max); 19 max = vint16x8_max(vec, max);
20 20
21 smpl += 32; 21 smpl += 8;
22 } 22 }
23 23
24 vint16x32_store_aligned(min, mins); 24 vint16x8_store_aligned(min, mins);
25 vint16x32_store_aligned(max, maxs); 25 vint16x8_store_aligned(max, maxs);
26 26
27 /* get the lowest minimum of what we have left */ 27 /* get the lowest minimum of what we have left */
28 for (i = 0; i < 32; i++) { 28 for (i = 0; i < 8; i++) {
29 if (mins[i] < smin) smin = mins[i]; 29 if (mins[i] < smin) smin = mins[i];
30 if (maxs[i] > smax) smax = maxs[i]; 30 if (maxs[i] > smax) smax = maxs[i];
31 } 31 }
32 32
33 len32 = length % 32; 33 len32 = length % 8;
34 while (len32--) { 34 while (len32--) {
35 if (*smpl < smin) smin = *smpl; 35 if (*smpl < smin) smin = *smpl;
36 if (*smpl > smax) smax = *smpl; 36 if (*smpl > smax) smax = *smpl;
37 37
38 smpl++; 38 smpl++;