comparison utils/gengcc.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents b0a3f0248ecc
children
comparison
equal deleted inserted replaced
44:b0a3f0248ecc 45:7955bed1d169
1 /**
2 * vec - a tiny SIMD vector library in C99
3 *
4 * Copyright (c) 2024-2025 Paper
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 **/
24
25 /* Use this file to generate include/vec/impl/generic.h !!
26 *
27 * `gcc -o gengeneric gengeneric.c` */
28
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <ctype.h>
33
34 #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
35
36 /* ------------------------------------------------------------------------ */
37
38 enum op {
39 /* return vector, take in a integer */
40 OP_SPLAT = 0,
41
42 /* return vector, take in an array */
43 OP_LOAD_ALIGNED,
44 OP_LOAD,
45
46 /* void, take in vector and array */
47 OP_STORE_ALIGNED,
48 OP_STORE,
49
50 /* return vector, takes in two vectors */
51 OP_ADD,
52 OP_SUB,
53 OP_MUL,
54 OP_AND,
55 OP_OR,
56 OP_XOR,
57 OP_CMPLT,
58 OP_CMPEQ,
59 OP_CMPGT,
60 OP_CMPLE,
61 OP_CMPGE,
62 OP_MIN,
63 OP_MAX,
64 OP_AVG,
65
66 /* return vector, takes in a vector and an explicitly unsigned vector */
67 OP_LSHIFT,
68 OP_RSHIFT,
69 OP_LRSHIFT,
70
71 /* return vector, takes in a vector */
72 OP_NOT,
73
74 OP_FINAL_,
75
76 /* operations that have some sort of "caveat" should go here, until
77 * they are fixed or removed */
78
79 OP_DIV, /* this one causes a floating point exception on my machine.
80 * possibly we could change the behavior of divide-by-zero
81 * with some gcc pragma ? --paper */
82 OP_MOD, /* ditto with the above */
83 };
84
85 /* convert op -> string */
86 static struct {
87 const char *u;
88 const char *l;
89 } op_names[] = {
90 [OP_SPLAT] = {"SPLAT", "splat"},
91 [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"},
92 [OP_LOAD] = {"LOAD", "load"},
93 [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"},
94 [OP_STORE] = {"STORE", "store"},
95 [OP_ADD] = {"ADD", "add"},
96 [OP_SUB] = {"SUB", "sub"},
97 [OP_MUL] = {"MUL", "mul"},
98 /*[OP_DIV] = {"DIV", "div"},*/
99 [OP_AVG] = {"AVG", "avg"},
100 [OP_AND] = {"AND", "and"},
101 [OP_OR] = {"OR", "or"},
102 [OP_XOR] = {"XOR", "xor"},
103 [OP_NOT] = {"NOT", "not"},
104 [OP_CMPLT] = {"CMPLT", "cmplt"},
105 [OP_CMPEQ] = {"CMPEQ", "cmpeq"},
106 [OP_CMPGT] = {"CMPGT", "cmpgt"},
107 [OP_CMPLE] = {"CMPLE", "cmple"},
108 [OP_CMPGE] = {"CMPGE", "cmpge"},
109 [OP_MIN] = {"MIN", "min"},
110 [OP_MAX] = {"MAX", "max"},
111 [OP_RSHIFT] = {"RSHIFT", "rshift"},
112 [OP_LRSHIFT] = {"LRSHIFT", "lrshift"},
113 [OP_LSHIFT] = {"LSHIFT", "lshift"},
114 };
115
116 #define UPSIGN(x) ((x) ? "" : "U")
117 #define LOSIGN(x) ((x) ? "" : "u")
118
119 static void print_gcc_op(enum op op, int is_signed, int bits, int size)
120 {
121 int i;
122 int gccprereq = 0;
123
124 switch (op) {
125 case OP_CMPEQ:
126 case OP_CMPLE:
127 case OP_CMPLT:
128 case OP_CMPGT:
129 case OP_CMPGE:
130 case OP_MIN:
131 case OP_MAX:
132 case OP_LSHIFT:
133 case OP_RSHIFT:
134 case OP_LRSHIFT:
135 case OP_AVG:
136 puts("#if VEC_GNUC_ATLEAST(4, 3, 0)");
137 gccprereq = 1;
138 break;
139 default:
140 break;
141 }
142
143 printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
144
145 printf("VEC_FUNC_IMPL ");
146
147 /* first; the return value */
148 switch (op) {
149 case OP_SPLAT:
150 case OP_LOAD_ALIGNED:
151 case OP_LOAD:
152 case OP_ADD:
153 case OP_SUB:
154 case OP_MUL:
155 case OP_DIV:
156 case OP_AND:
157 case OP_OR:
158 case OP_XOR:
159 case OP_CMPLT:
160 case OP_CMPEQ:
161 case OP_CMPGT:
162 case OP_CMPLE:
163 case OP_CMPGE:
164 case OP_MIN:
165 case OP_MAX:
166 case OP_AVG:
167 case OP_RSHIFT:
168 case OP_LRSHIFT:
169 case OP_LSHIFT:
170 case OP_NOT:
171 printf("v%sint%dx%d", LOSIGN(is_signed), bits, size);
172 break;
173 case OP_STORE_ALIGNED:
174 case OP_STORE:
175 printf("void");
176 break;
177 }
178
179 /* whitespace and function name */
180 printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l);
181
182 /* parameters */
183 switch (op) {
184 case OP_SPLAT:
185 printf("vec_%sint%d x", LOSIGN(is_signed), bits);
186 break;
187 case OP_LOAD_ALIGNED:
188 case OP_LOAD:
189 printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size);
190 break;
191 case OP_STORE_ALIGNED:
192 case OP_STORE:
193 printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
194 break;
195 case OP_ADD:
196 case OP_SUB:
197 case OP_MUL:
198 case OP_DIV:
199 case OP_AND:
200 case OP_OR:
201 case OP_XOR:
202 case OP_CMPLT:
203 case OP_CMPEQ:
204 case OP_CMPGT:
205 case OP_CMPLE:
206 case OP_CMPGE:
207 case OP_MIN:
208 case OP_MAX:
209 case OP_AVG:
210 printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size);
211 break;
212 case OP_RSHIFT:
213 case OP_LRSHIFT:
214 case OP_LSHIFT:
215 printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size);
216 break;
217 case OP_NOT:
218 printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size);
219 break;
220 }
221
222 puts(")\n{");
223
224 switch (op) {
225 case OP_SPLAT:
226 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
227 printf("\tvec.gcc = (__typeof__(vec.gcc)){");
228 for (i = 0; i < size; i++)
229 printf("x,");
230 printf("};\n");
231 printf("\treturn vec;\n");
232 break;
233 case OP_LOAD_ALIGNED:
234 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
235 puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;");
236 printf("\treturn vec;\n");
237 break;
238 case OP_LOAD:
239 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size);
240 puts("\tmemcpy(&vec, x, sizeof(vec));");
241 printf("\treturn vec;\n");
242 break;
243 case OP_STORE_ALIGNED:
244 puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;");
245 break;
246 case OP_STORE:
247 puts("\tmemcpy(arr, &vec, sizeof(vec));");
248 break;
249 case OP_ADD:
250 case OP_SUB:
251 case OP_MUL:
252 case OP_DIV:
253 case OP_AND:
254 case OP_OR:
255 case OP_XOR:
256 case OP_CMPLT:
257 case OP_CMPEQ:
258 case OP_CMPGT:
259 case OP_CMPLE:
260 case OP_CMPGE: {
261 const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="};
262
263 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]);
264 printf("\treturn vec1;\n");
265 break;
266 }
267
268 case OP_LSHIFT:
269 case OP_RSHIFT: {
270 const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"};
271
272 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]);
273 printf("\treturn vec1;\n");
274 break;
275 }
276
277 case OP_LRSHIFT: {
278 /* sigh */
279 printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8);
280 printf("\treturn vec1;\n");
281 break;
282 }
283 case OP_MIN:
284 case OP_MAX: {
285 const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"};
286
287 printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size);
288 printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]);
289 printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n");
290 printf("\treturn vec1;\n");
291 break;
292 }
293 case OP_AVG:
294 printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size);
295
296 if (is_signed) {
297 puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);");
298 puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);");
299 puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);");
300 puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);");
301 puts("");
302 printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n");
303 } else {
304 printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);\n");
305 }
306
307 printf("\treturn vec1;\n");
308 break;
309 case OP_NOT:
310 printf("\tvec.gcc = ~vec.gcc;\n");
311 printf("\treturn vec;\n");
312 break;
313 default:
314 printf("#error implement this operation");
315 break;
316 }
317
318 /* end function definition */
319 puts("}");
320
321 printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u);
322 puts("#endif");
323
324 if (gccprereq)
325 puts("#endif");
326 }
327
328 static inline void print_ops(int is_signed, int bits, int size)
329 {
330 int i;
331
332 printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size);
333
334 for (i = 0; i < OP_FINAL_; i++)
335 print_gcc_op(i, is_signed, bits, size);
336 }
337
338 static const char *header =
339 "/**\n"
340 " * vec - a tiny SIMD vector library in C99\n"
341 " * \n"
342 " * Copyright (c) 2024-2025 Paper\n"
343 " * \n"
344 " * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
345 " * of this software and associated documentation files (the \"Software\"), to deal\n"
346 " * in the Software without restriction, including without limitation the rights\n"
347 " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
348 " * copies of the Software, and to permit persons to whom the Software is\n"
349 " * furnished to do so, subject to the following conditions:\n"
350 " * \n"
351 " * The above copyright notice and this permission notice shall be included in all\n"
352 " * copies or substantial portions of the Software.\n"
353 " * \n"
354 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
355 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
356 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
357 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
358 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
359 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
360 " * SOFTWARE.\n"
361 "**/\n"
362 "\n"
363 "/* This file is automatically generated! Do not edit it directly!\n"
364 " * Edit the code that generates it in utils/gengcc.c --paper */\n"
365 "\n"
366 "#ifndef VEC_IMPL_GCC_H_\n"
367 "#define VEC_IMPL_GCC_H_\n"
368 "\n";
369
370 static const char *footer =
371 "#endif /* VEC_IMPL_GCC_H_ */\n";
372
373 int main(void)
374 {
375 static struct {
376 int bits, size;
377 } defs[] = {
378 /* -- 8-bit */
379 {8, 2},
380 {8, 4},
381 {8, 8},
382 {8, 16},
383 {8, 32},
384 {8, 64},
385
386 /* -- 16-bit */
387 {16, 2},
388 {16, 4},
389 {16, 8},
390 {16, 16},
391 {16, 32},
392
393 /* -- 32-bit */
394 {32, 2},
395 {32, 4},
396 {32, 8},
397 {32, 16},
398
399 /* -- 64-bit */
400 {64, 2},
401 {64, 4},
402 {64, 8},
403 };
404 int i;
405
406 puts(header);
407
408 for (i = 0; i < ARRAY_SIZE(defs); i++) {
409 print_ops(1, defs[i].bits, defs[i].size);
410 print_ops(0, defs[i].bits, defs[i].size);
411 }
412
413 puts(footer);
414 }