Mercurial > vec
comparison utils/gengcc.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | b0a3f0248ecc |
children |
comparison
equal
deleted
inserted
replaced
44:b0a3f0248ecc | 45:7955bed1d169 |
---|---|
1 /** | |
2 * vec - a tiny SIMD vector library in C99 | |
3 * | |
4 * Copyright (c) 2024-2025 Paper | |
5 * | |
6 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 * of this software and associated documentation files (the "Software"), to deal | |
8 * in the Software without restriction, including without limitation the rights | |
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 * copies of the Software, and to permit persons to whom the Software is | |
11 * furnished to do so, subject to the following conditions: | |
12 * | |
13 * The above copyright notice and this permission notice shall be included in all | |
14 * copies or substantial portions of the Software. | |
15 * | |
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
22 * SOFTWARE. | |
23 **/ | |
24 | |
25 /* Use this file to generate include/vec/impl/generic.h !! | |
26 * | |
27 * `gcc -o gengeneric gengeneric.c` */ | |
28 | |
29 #include <stdio.h> | |
30 #include <stdlib.h> | |
31 #include <string.h> | |
32 #include <ctype.h> | |
33 | |
34 #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) | |
35 | |
36 /* ------------------------------------------------------------------------ */ | |
37 | |
38 enum op { | |
39 /* return vector, take in a integer */ | |
40 OP_SPLAT = 0, | |
41 | |
42 /* return vector, take in an array */ | |
43 OP_LOAD_ALIGNED, | |
44 OP_LOAD, | |
45 | |
46 /* void, take in vector and array */ | |
47 OP_STORE_ALIGNED, | |
48 OP_STORE, | |
49 | |
50 /* return vector, takes in two vectors */ | |
51 OP_ADD, | |
52 OP_SUB, | |
53 OP_MUL, | |
54 OP_AND, | |
55 OP_OR, | |
56 OP_XOR, | |
57 OP_CMPLT, | |
58 OP_CMPEQ, | |
59 OP_CMPGT, | |
60 OP_CMPLE, | |
61 OP_CMPGE, | |
62 OP_MIN, | |
63 OP_MAX, | |
64 OP_AVG, | |
65 | |
66 /* return vector, takes in a vector and an explicitly unsigned vector */ | |
67 OP_LSHIFT, | |
68 OP_RSHIFT, | |
69 OP_LRSHIFT, | |
70 | |
71 /* return vector, takes in a vector */ | |
72 OP_NOT, | |
73 | |
74 OP_FINAL_, | |
75 | |
76 /* operations that have some sort of "caveat" should go here, until | |
77 * they are fixed or removed */ | |
78 | |
79 OP_DIV, /* this one causes a floating point exception on my machine. | |
80 * possibly we could change the behavior of divide-by-zero | |
81 * with some gcc pragma ? --paper */ | |
82 OP_MOD, /* ditto with the above */ | |
83 }; | |
84 | |
85 /* convert op -> string */ | |
86 static struct { | |
87 const char *u; | |
88 const char *l; | |
89 } op_names[] = { | |
90 [OP_SPLAT] = {"SPLAT", "splat"}, | |
91 [OP_LOAD_ALIGNED] = {"LOAD_ALIGNED", "load_aligned"}, | |
92 [OP_LOAD] = {"LOAD", "load"}, | |
93 [OP_STORE_ALIGNED] = {"STORE_ALIGNED", "store_aligned"}, | |
94 [OP_STORE] = {"STORE", "store"}, | |
95 [OP_ADD] = {"ADD", "add"}, | |
96 [OP_SUB] = {"SUB", "sub"}, | |
97 [OP_MUL] = {"MUL", "mul"}, | |
98 /*[OP_DIV] = {"DIV", "div"},*/ | |
99 [OP_AVG] = {"AVG", "avg"}, | |
100 [OP_AND] = {"AND", "and"}, | |
101 [OP_OR] = {"OR", "or"}, | |
102 [OP_XOR] = {"XOR", "xor"}, | |
103 [OP_NOT] = {"NOT", "not"}, | |
104 [OP_CMPLT] = {"CMPLT", "cmplt"}, | |
105 [OP_CMPEQ] = {"CMPEQ", "cmpeq"}, | |
106 [OP_CMPGT] = {"CMPGT", "cmpgt"}, | |
107 [OP_CMPLE] = {"CMPLE", "cmple"}, | |
108 [OP_CMPGE] = {"CMPGE", "cmpge"}, | |
109 [OP_MIN] = {"MIN", "min"}, | |
110 [OP_MAX] = {"MAX", "max"}, | |
111 [OP_RSHIFT] = {"RSHIFT", "rshift"}, | |
112 [OP_LRSHIFT] = {"LRSHIFT", "lrshift"}, | |
113 [OP_LSHIFT] = {"LSHIFT", "lshift"}, | |
114 }; | |
115 | |
116 #define UPSIGN(x) ((x) ? "" : "U") | |
117 #define LOSIGN(x) ((x) ? "" : "u") | |
118 | |
119 static void print_gcc_op(enum op op, int is_signed, int bits, int size) | |
120 { | |
121 int i; | |
122 int gccprereq = 0; | |
123 | |
124 switch (op) { | |
125 case OP_CMPEQ: | |
126 case OP_CMPLE: | |
127 case OP_CMPLT: | |
128 case OP_CMPGT: | |
129 case OP_CMPGE: | |
130 case OP_MIN: | |
131 case OP_MAX: | |
132 case OP_LSHIFT: | |
133 case OP_RSHIFT: | |
134 case OP_LRSHIFT: | |
135 case OP_AVG: | |
136 puts("#if VEC_GNUC_ATLEAST(4, 3, 0)"); | |
137 gccprereq = 1; | |
138 break; | |
139 default: | |
140 break; | |
141 } | |
142 | |
143 printf("#ifndef V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); | |
144 | |
145 printf("VEC_FUNC_IMPL "); | |
146 | |
147 /* first; the return value */ | |
148 switch (op) { | |
149 case OP_SPLAT: | |
150 case OP_LOAD_ALIGNED: | |
151 case OP_LOAD: | |
152 case OP_ADD: | |
153 case OP_SUB: | |
154 case OP_MUL: | |
155 case OP_DIV: | |
156 case OP_AND: | |
157 case OP_OR: | |
158 case OP_XOR: | |
159 case OP_CMPLT: | |
160 case OP_CMPEQ: | |
161 case OP_CMPGT: | |
162 case OP_CMPLE: | |
163 case OP_CMPGE: | |
164 case OP_MIN: | |
165 case OP_MAX: | |
166 case OP_AVG: | |
167 case OP_RSHIFT: | |
168 case OP_LRSHIFT: | |
169 case OP_LSHIFT: | |
170 case OP_NOT: | |
171 printf("v%sint%dx%d", LOSIGN(is_signed), bits, size); | |
172 break; | |
173 case OP_STORE_ALIGNED: | |
174 case OP_STORE: | |
175 printf("void"); | |
176 break; | |
177 } | |
178 | |
179 /* whitespace and function name */ | |
180 printf(" v%sint%dx%d_%s(", LOSIGN(is_signed), bits, size, op_names[op].l); | |
181 | |
182 /* parameters */ | |
183 switch (op) { | |
184 case OP_SPLAT: | |
185 printf("vec_%sint%d x", LOSIGN(is_signed), bits); | |
186 break; | |
187 case OP_LOAD_ALIGNED: | |
188 case OP_LOAD: | |
189 printf("const vec_%sint%d x[%d]", LOSIGN(is_signed), bits, size); | |
190 break; | |
191 case OP_STORE_ALIGNED: | |
192 case OP_STORE: | |
193 printf("v%sint%dx%d vec, vec_%sint%d arr[%d]", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); | |
194 break; | |
195 case OP_ADD: | |
196 case OP_SUB: | |
197 case OP_MUL: | |
198 case OP_DIV: | |
199 case OP_AND: | |
200 case OP_OR: | |
201 case OP_XOR: | |
202 case OP_CMPLT: | |
203 case OP_CMPEQ: | |
204 case OP_CMPGT: | |
205 case OP_CMPLE: | |
206 case OP_CMPGE: | |
207 case OP_MIN: | |
208 case OP_MAX: | |
209 case OP_AVG: | |
210 printf("v%sint%dx%d vec1, v%sint%dx%d vec2", LOSIGN(is_signed), bits, size, LOSIGN(is_signed), bits, size); | |
211 break; | |
212 case OP_RSHIFT: | |
213 case OP_LRSHIFT: | |
214 case OP_LSHIFT: | |
215 printf("v%sint%dx%d vec1, vuint%dx%d vec2", LOSIGN(is_signed), bits, size, bits, size); | |
216 break; | |
217 case OP_NOT: | |
218 printf("v%sint%dx%d vec", LOSIGN(is_signed), bits, size); | |
219 break; | |
220 } | |
221 | |
222 puts(")\n{"); | |
223 | |
224 switch (op) { | |
225 case OP_SPLAT: | |
226 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); | |
227 printf("\tvec.gcc = (__typeof__(vec.gcc)){"); | |
228 for (i = 0; i < size; i++) | |
229 printf("x,"); | |
230 printf("};\n"); | |
231 printf("\treturn vec;\n"); | |
232 break; | |
233 case OP_LOAD_ALIGNED: | |
234 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); | |
235 puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); | |
236 printf("\treturn vec;\n"); | |
237 break; | |
238 case OP_LOAD: | |
239 printf("\tv%sint%dx%d vec;\n", LOSIGN(is_signed), bits, size); | |
240 puts("\tmemcpy(&vec, x, sizeof(vec));"); | |
241 printf("\treturn vec;\n"); | |
242 break; | |
243 case OP_STORE_ALIGNED: | |
244 puts("\t*(__typeof__(vec.gcc) *)arr = vec.gcc;"); | |
245 break; | |
246 case OP_STORE: | |
247 puts("\tmemcpy(arr, &vec, sizeof(vec));"); | |
248 break; | |
249 case OP_ADD: | |
250 case OP_SUB: | |
251 case OP_MUL: | |
252 case OP_DIV: | |
253 case OP_AND: | |
254 case OP_OR: | |
255 case OP_XOR: | |
256 case OP_CMPLT: | |
257 case OP_CMPEQ: | |
258 case OP_CMPGT: | |
259 case OP_CMPLE: | |
260 case OP_CMPGE: { | |
261 const char *op_builtins[OP_CMPGE - OP_ADD + 1] = {"+", "-", "*", /*"/", */"&", "|", "^", "<", "==", ">", "<=", ">="}; | |
262 | |
263 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_ADD]); | |
264 printf("\treturn vec1;\n"); | |
265 break; | |
266 } | |
267 | |
268 case OP_LSHIFT: | |
269 case OP_RSHIFT: { | |
270 const char *op_builtins[OP_RSHIFT - OP_LSHIFT + 1] = {"<<", ">>"}; | |
271 | |
272 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_LSHIFT]); | |
273 printf("\treturn vec1;\n"); | |
274 break; | |
275 } | |
276 | |
277 case OP_LRSHIFT: { | |
278 /* sigh */ | |
279 printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); | |
280 printf("\treturn vec1;\n"); | |
281 break; | |
282 } | |
283 case OP_MIN: | |
284 case OP_MAX: { | |
285 const char *op_builtins[OP_MAX - OP_MIN + 1] = {"<", ">"}; | |
286 | |
287 printf("\tv%sint%dx%d mask;\n", LOSIGN(is_signed), bits, size); | |
288 printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op - OP_MIN]); | |
289 printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); | |
290 printf("\treturn vec1;\n"); | |
291 break; | |
292 } | |
293 case OP_AVG: | |
294 printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); | |
295 | |
296 if (is_signed) { | |
297 puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); | |
298 puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); | |
299 puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); | |
300 puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); | |
301 puts(""); | |
302 printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); | |
303 } else { | |
304 printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & ones.gcc);\n"); | |
305 } | |
306 | |
307 printf("\treturn vec1;\n"); | |
308 break; | |
309 case OP_NOT: | |
310 printf("\tvec.gcc = ~vec.gcc;\n"); | |
311 printf("\treturn vec;\n"); | |
312 break; | |
313 default: | |
314 printf("#error implement this operation"); | |
315 break; | |
316 } | |
317 | |
318 /* end function definition */ | |
319 puts("}"); | |
320 | |
321 printf("# define V%sINT%dx%d_%s_DEFINED\n", UPSIGN(is_signed), bits, size, op_names[op].u); | |
322 puts("#endif"); | |
323 | |
324 if (gccprereq) | |
325 puts("#endif"); | |
326 } | |
327 | |
328 static inline void print_ops(int is_signed, int bits, int size) | |
329 { | |
330 int i; | |
331 | |
332 printf("\n\n/* v%sint%dx%d */\n\n", (is_signed ? "u" : ""), bits, size); | |
333 | |
334 for (i = 0; i < OP_FINAL_; i++) | |
335 print_gcc_op(i, is_signed, bits, size); | |
336 } | |
337 | |
338 static const char *header = | |
339 "/**\n" | |
340 " * vec - a tiny SIMD vector library in C99\n" | |
341 " * \n" | |
342 " * Copyright (c) 2024-2025 Paper\n" | |
343 " * \n" | |
344 " * Permission is hereby granted, free of charge, to any person obtaining a copy\n" | |
345 " * of this software and associated documentation files (the \"Software\"), to deal\n" | |
346 " * in the Software without restriction, including without limitation the rights\n" | |
347 " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n" | |
348 " * copies of the Software, and to permit persons to whom the Software is\n" | |
349 " * furnished to do so, subject to the following conditions:\n" | |
350 " * \n" | |
351 " * The above copyright notice and this permission notice shall be included in all\n" | |
352 " * copies or substantial portions of the Software.\n" | |
353 " * \n" | |
354 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n" | |
355 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n" | |
356 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n" | |
357 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n" | |
358 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n" | |
359 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n" | |
360 " * SOFTWARE.\n" | |
361 "**/\n" | |
362 "\n" | |
363 "/* This file is automatically generated! Do not edit it directly!\n" | |
364 " * Edit the code that generates it in utils/gengcc.c --paper */\n" | |
365 "\n" | |
366 "#ifndef VEC_IMPL_GCC_H_\n" | |
367 "#define VEC_IMPL_GCC_H_\n" | |
368 "\n"; | |
369 | |
370 static const char *footer = | |
371 "#endif /* VEC_IMPL_GCC_H_ */\n"; | |
372 | |
373 int main(void) | |
374 { | |
375 static struct { | |
376 int bits, size; | |
377 } defs[] = { | |
378 /* -- 8-bit */ | |
379 {8, 2}, | |
380 {8, 4}, | |
381 {8, 8}, | |
382 {8, 16}, | |
383 {8, 32}, | |
384 {8, 64}, | |
385 | |
386 /* -- 16-bit */ | |
387 {16, 2}, | |
388 {16, 4}, | |
389 {16, 8}, | |
390 {16, 16}, | |
391 {16, 32}, | |
392 | |
393 /* -- 32-bit */ | |
394 {32, 2}, | |
395 {32, 4}, | |
396 {32, 8}, | |
397 {32, 16}, | |
398 | |
399 /* -- 64-bit */ | |
400 {64, 2}, | |
401 {64, 4}, | |
402 {64, 8}, | |
403 }; | |
404 int i; | |
405 | |
406 puts(header); | |
407 | |
408 for (i = 0; i < ARRAY_SIZE(defs); i++) { | |
409 print_ops(1, defs[i].bits, defs[i].size); | |
410 print_ops(0, defs[i].bits, defs[i].size); | |
411 } | |
412 | |
413 puts(footer); | |
414 } |