comparison gen/gengeneric.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents
children
comparison
equal deleted inserted replaced
44:b0a3f0248ecc 45:7955bed1d169
1 /**
2 * vec - a tiny SIMD vector library in C99
3 *
4 * Copyright (c) 2024-2025 Paper
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 **/
24
25 #include "genlib.h"
26
27 #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
28
29 static void op_builtin_pbody(int op, int type, int bits, int size)
30 {
31 const char *ops[OP_FINAL_] = {
32 [OP_ADD] = "+",
33 [OP_SUB] = "-",
34 [OP_MUL] = "*",
35 [OP_AND] = "&",
36 [OP_OR] = "|",
37 [OP_XOR] = "^",
38 };
39 int i;
40
41 for (i = 0; i < size; i++)
42 printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]);\n", i, i, ops[op], i);
43
44 puts("\treturn vec1;");
45
46 (void)bits;
47 }
48
49 static void op_builtin_avg_pbody(int op, int type, int bits, int size)
50 {
51 int i;
52
53 switch (type) {
54 case TYPE_INT:
55 printf("\t");
56 gen_print_stype(type, bits);
57 printf(" x_d_rem, y_d_rem, rem_d_quot, rem_d_rem;\n");
58
59 for (i = 0; i < size; i++)
60 printf(
61 "\tx_d_rem = (vec1.generic[%d] % 2);\n"
62 "\ty_d_rem = (vec2.generic[%d] % 2);\n"
63 "\trem_d_quot = ((x_d_rem + y_d_rem) / 2);\n"
64 "\trem_d_rem = ((x_d_rem + y_d_rem) % 2);\n"
65 "\n"
66 "\tvec1.generic[%d] = ((vec1.generic[%d] / 2) + (vec2.generic[%d] / 2)) + (rem_d_quot) + (rem_d_rem == 1);\n"
67 , i, i, i, i, i);
68 break;
69 case TYPE_UINT:
70 for (i = 0; i < size; i++)
71 printf("vec1.generic[%d] = (vec1.generic[%d] >> 1) + (vec2.generic[%d] >> 1) + ((vec1.generic[%d] | vec2.generic[%d]) & 1);\n", i, i, i, i, i);
72 break;
73 case TYPE_FLOAT:
74 /* this is probably fine. */
75 for (i = 0; i < size; i++)
76 printf("\tvec1.generic[%d] = (vec1.generic[%d] + vec2.generic[%d]) / 2;\n", i, i, i);
77 break;
78 }
79
80 printf("\treturn vec1;\n");
81 }
82
83 static void op_builtin_not_pbody(int op, int type, int bits, int size)
84 {
85 int i;
86
87 for (i = 0; i < size; i++)
88 printf("\tvec.generic[%d] = ~vec.generic[%d];\n", i, i);
89
90 puts("\treturn vec;");
91 }
92
93 static void op_builtin_shift_pbody(int op, int type, int bits, int size)
94 {
95 int i;
96
97 switch (type) {
98 case TYPE_UINT: {
99 const char *ops[] = {
100 [OP_LSHIFT] = "<<",
101 [OP_RSHIFT] = ">>",
102 [OP_LRSHIFT] = ">>",
103 };
104
105 for (i = 0; i < size; i++)
106 printf("\tvec1.generic[%d] %s= vec2.generic[%d];\n", i, ops[op]);
107 break;
108 }
109 case TYPE_INT: {
110 switch (op) {
111 case OP_LSHIFT:
112 case OP_LRSHIFT: {
113 const char *ops[] = {
114 [OP_LSHIFT] = "<<",
115 [OP_LRSHIFT] = ">>",
116 };
117
118 printf("\tunion { ");
119 gen_print_stype(TYPE_UINT, bits);
120 printf(" u; ");
121 gen_print_stype(TYPE_INT, bits);
122 puts(" s; } x;\n");
123
124 for (i = 0; i < size; i++)
125 printf(
126 "\tx.s = vec1.generic[%d];\n"
127 "\tx.u %s= vec2.generic[%d];\n"
128 "\tvec1.generic[%d] = x.s;\n",
129 i, ops[op], i, i);
130 break;
131 }
132 case OP_RSHIFT:
133 for (i = 0; i < size; i++)
134 printf("vec1.generic[%d] = ((~vec1.generic[%d]) >> vec2.generic[%d]);\n", i, i, i);
135 break;
136 }
137 break;
138 }
139 }
140
141 puts("\treturn vec1;");
142 }
143
144 static void op_builtin_nonzero_pbody(int op, int type, int bits, int size)
145 {
146 const char *ops[OP_FINAL_] = {
147 [OP_DIV] = "/",
148 [OP_MOD] = "%",
149 };
150 int i;
151
152 if (op == OP_MOD && type == TYPE_FLOAT) {
153 for (i = 0; i < size; i++)
154 printf("\tvec1.generic[%d] = (vec2.generic[%d] ? fmod(vec1.generic[%d], vec2.generic[%d]) : 0);\n", i, i, i, i);
155 } else {
156 for (i = 0; i < size; i++)
157 printf("\tvec1.generic[%d] = (vec2.generic[%d] ? (vec1.generic[%d] %s vec2.generic[%d]) : 0);\n", i, i, i, ops[op], i);
158 }
159
160 puts("\treturn vec1;");
161
162 (void)bits;
163 }
164
165 static void op_cmp_pbody(int op, int type, int bits, int size)
166 {
167 const char *ops[OP_FINAL_] = {
168 [OP_CMPLT] = "<",
169 [OP_CMPLE] = "<=",
170 [OP_CMPEQ] = "==",
171 [OP_CMPGE] = ">=",
172 [OP_CMPGT] = ">",
173 };
174 int i;
175
176 /* this is portable for int uint and float*/
177 for (i = 0; i < size; i++)
178 printf("\tmemset(&vec1.generic[%d], (vec1.generic[%d] %s vec2.generic[%d]) ? 0xFF : 0, %d);\n", i, i, ops[op], i, bits / 8);
179
180 puts("\treturn vec1;");
181 }
182
183 static void op_minmax_pbody(int op, int type, int bits, int size)
184 {
185 const char *ops[OP_FINAL_] = {
186 [OP_MIN] = "<",
187 [OP_MAX] = ">",
188 };
189 int i;
190
191 for (i = 0; i < size; i++)
192 printf("\tvec1.generic[%d] = (vec1.generic[%d] %s vec2.generic[%d]) ? (vec1.generic[%d]) : (vec2.generic[%d]);\n", i, i, ops[op], i, i, i);
193
194 puts("\treturn vec1;");
195 }
196
197 static void op_splat_pbody(int op, int type, int bits, int size)
198 {
199 int i;
200
201 printf("\t");
202 gen_print_vtype(type, bits, size);
203 printf(" vec;\n");
204
205 for (i = 0; i < size; i++)
206 printf("\tvec.generic[%d] = x;\n", i);
207
208 puts("\treturn vec;");
209 }
210
211 static void op_load_pbody(int op, int type, int bits, int size)
212 {
213 int i;
214
215 printf("\t");
216 gen_print_vtype(type, bits, size);
217 printf(" vec;\n");
218
219 printf("\tmemcpy(vec.generic, x, %d);\n", (bits / 8) * size);
220
221 puts("\treturn vec;");
222 }
223
224 static void op_store_pbody(int op, int type, int bits, int size)
225 {
226 printf("\tmemcpy(x, vec.generic, %d);\n", (bits / 8) * size);
227 }
228
229 /* ------------------------------------------------------------------------ */
230
231 static struct op_impl op_impl[OP_FINAL_] = {
232 [OP_SPLAT] = {NULL, NULL, op_splat_pbody},
233 [OP_LOAD_ALIGNED] = {NULL, NULL, op_load_pbody},
234 [OP_LOAD] = {NULL, NULL, op_load_pbody},
235 [OP_STORE_ALIGNED] = {NULL, NULL, op_store_pbody},
236 [OP_STORE] = {NULL, NULL, op_store_pbody},
237
238 /* arithmetic */
239 [OP_ADD] = {NULL, NULL, op_builtin_pbody},
240 [OP_SUB] = {NULL, NULL, op_builtin_pbody},
241 [OP_MUL] = {NULL, NULL, op_builtin_pbody},
242 [OP_DIV] = {NULL, NULL, op_builtin_nonzero_pbody},
243 [OP_MOD] = {NULL, NULL, op_builtin_nonzero_pbody},
244 [OP_AVG] = {NULL, NULL, op_builtin_avg_pbody},
245
246 /* bitwise */
247 [OP_AND] = {NULL, NULL, op_builtin_pbody},
248 [OP_OR] = {NULL, NULL, op_builtin_pbody},
249 [OP_XOR] = {NULL, NULL, op_builtin_pbody},
250 [OP_NOT] = {NULL, NULL, op_builtin_not_pbody},
251
252 /* min/max */
253 [OP_MIN] = {NULL, NULL, op_minmax_pbody},
254 [OP_MAX] = {NULL, NULL, op_minmax_pbody},
255
256 /* bitshift */
257 [OP_LSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
258 [OP_LRSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
259 [OP_RSHIFT] = {NULL, NULL, op_builtin_shift_pbody},
260
261 /* comparison */
262 [OP_CMPLT] = {NULL, NULL, op_cmp_pbody},
263 [OP_CMPLE] = {NULL, NULL, op_cmp_pbody},
264 [OP_CMPEQ] = {NULL, NULL, op_cmp_pbody},
265 [OP_CMPGE] = {NULL, NULL, op_cmp_pbody},
266 [OP_CMPGT] = {NULL, NULL, op_cmp_pbody},
267 };
268
269 int main(void)
270 {
271 gen(op_impl, "generic");
272
273 return 0;
274 }