comparison gen/gendouble.c @ 45:7955bed1d169 default tip

*: add preliminary floating point support no x86 intrinsics just yet, but I did add altivec since it's (arguably) the simplest :)
author Paper <paper@tflc.us>
date Wed, 30 Apr 2025 18:36:38 -0400
parents
children
comparison
equal deleted inserted replaced
44:b0a3f0248ecc 45:7955bed1d169
1 /**
2 * vec - a tiny SIMD vector library in C99
3 *
4 * Copyright (c) 2024-2025 Paper
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 **/
24
25 #include "genlib.h"
26
27 #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
28
29 /* XXX: would it be faster to unroll literally everything instead of defining everything,
30 * and then unpacking it all? */
31 static const char *header =
32 "/**\n"
33 " * vec - a tiny SIMD vector library in C99\n"
34 " * \n"
35 " * Copyright (c) 2024-2025 Paper\n"
36 " * \n"
37 " * Permission is hereby granted, free of charge, to any person obtaining a copy\n"
38 " * of this software and associated documentation files (the \"Software\"), to deal\n"
39 " * in the Software without restriction, including without limitation the rights\n"
40 " * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
41 " * copies of the Software, and to permit persons to whom the Software is\n"
42 " * furnished to do so, subject to the following conditions:\n"
43 " * \n"
44 " * The above copyright notice and this permission notice shall be included in all\n"
45 " * copies or substantial portions of the Software.\n"
46 " * \n"
47 " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n"
48 " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n"
49 " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n"
50 " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n"
51 " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n"
52 " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n"
53 " * SOFTWARE.\n"
54 "**/\n"
55 "\n"
56 "/* This file is automatically generated! Do not edit it directly!\n"
57 " * Edit the code that generates it in utils/gendbl.c --paper */\n"
58 "\n"
59 "#ifndef VEC_IMPL_DOUBLE_H_\n"
60 "#define VEC_IMPL_DOUBLE_H_\n"
61 "\n"
62 "#define VEC_DOUBLE_SPLAT(sign, bits, size, halfsize) \\\n"
63 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n"
64 " { \\\n"
65 " v##sign##int##bits##x##size vec; \\\n"
66 " \\\n"
67 " vec.dbl[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
68 " vec.dbl[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
69 " \\\n"
70 " return vec; \\\n"
71 " }\n"
72 "\n"
73 "#define VEC_DOUBLE_LOAD_EX(name, sign, bits, size, halfsize) \\\n"
74 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n"
75 " { \\\n"
76 " v##sign##int##bits##x##size vec; \\\n"
77 " \\\n"
78 " vec.dbl[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n"
79 " vec.dbl[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n"
80 " \\\n"
81 " return vec; \\\n"
82 " }\n"
83 "\n"
84 "#define VEC_DOUBLE_LOAD(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load, sign, bits, size, halfsize)\n"
85 "#define VEC_DOUBLE_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n"
86 "\n"
87 "#define VEC_DOUBLE_STORE_EX(name, sign, bits, size, halfsize) \\\n"
88 " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n"
89 " { \\\n"
90 " v##sign##int##bits##x##halfsize##_##name(vec.dbl[0], x); \\\n"
91 " v##sign##int##bits##x##halfsize##_##name(vec.dbl[1], x + halfsize); \\\n"
92 " }\n"
93 "\n"
94 "#define VEC_DOUBLE_STORE(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store, sign, bits, size, halfsize)\n"
95 "#define VEC_DOUBLE_STORE_ALIGNED(sign, bits, size, halfsize) VEC_DOUBLE_STORE_EX(store_aligned, sign, bits, size, halfsize)\n"
96 "\n"
97 "#define VEC_DOUBLE_OP(name, sign, bits, size, halfsize, secondsign) \\\n"
98 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n"
99 " { \\\n"
100 " vec1.dbl[0] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[0], vec2.dbl[0]); \\\n"
101 " vec1.dbl[1] = v##sign##int##bits##x##halfsize##_##name(vec1.dbl[1], vec2.dbl[1]); \\\n"
102 " \\\n"
103 " return vec1; \\\n"
104 " }\n"
105 "\n"
106 "#define VEC_DOUBLE_ADD(sign, bits, size, halfsize) VEC_DOUBLE_OP(add, sign, bits, size, halfsize, sign)\n"
107 "#define VEC_DOUBLE_SUB(sign, bits, size, halfsize) VEC_DOUBLE_OP(sub, sign, bits, size, halfsize, sign)\n"
108 "#define VEC_DOUBLE_MUL(sign, bits, size, halfsize) VEC_DOUBLE_OP(mul, sign, bits, size, halfsize, sign)\n"
109 "#define VEC_DOUBLE_DIV(sign, bits, size, halfsize) VEC_DOUBLE_OP(div, sign, bits, size, halfsize, sign)\n"
110 "#define VEC_DOUBLE_MOD(sign, bits, size, halfsize) VEC_DOUBLE_OP(mod, sign, bits, size, halfsize, sign)\n"
111 "#define VEC_DOUBLE_AVG(sign, bits, size, halfsize) VEC_DOUBLE_OP(avg, sign, bits, size, halfsize, sign)\n"
112 "#define VEC_DOUBLE_LSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lshift, sign, bits, size, halfsize, u)\n"
113 "#define VEC_DOUBLE_RSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(rshift, sign, bits, size, halfsize, u)\n"
114 "#define VEC_DOUBLE_LRSHIFT(sign, bits, size, halfsize) VEC_DOUBLE_OP(lrshift, sign, bits, size, halfsize, u)\n"
115 "#define VEC_DOUBLE_AND(sign, bits, size, halfsize) VEC_DOUBLE_OP(and, sign, bits, size, halfsize, sign)\n"
116 "#define VEC_DOUBLE_OR(sign, bits, size, halfsize) VEC_DOUBLE_OP(or, sign, bits, size, halfsize, sign)\n"
117 "#define VEC_DOUBLE_XOR(sign, bits, size, halfsize) VEC_DOUBLE_OP(xor, sign, bits, size, halfsize, sign)\n"
118 "#define VEC_DOUBLE_MIN(sign, bits, size, halfsize) VEC_DOUBLE_OP(min, sign, bits, size, halfsize, sign)\n"
119 "#define VEC_DOUBLE_MAX(sign, bits, size, halfsize) VEC_DOUBLE_OP(max, sign, bits, size, halfsize, sign)\n"
120 "#define VEC_DOUBLE_CMPLT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmplt, sign, bits, size, halfsize, sign)\n"
121 "#define VEC_DOUBLE_CMPLE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmple, sign, bits, size, halfsize, sign)\n"
122 "#define VEC_DOUBLE_CMPEQ(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpeq, sign, bits, size, halfsize, sign)\n"
123 "#define VEC_DOUBLE_CMPGE(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpge, sign, bits, size, halfsize, sign)\n"
124 "#define VEC_DOUBLE_CMPGT(sign, bits, size, halfsize) VEC_DOUBLE_OP(cmpgt, sign, bits, size, halfsize, sign)\n"
125 "\n"
126 "#define VEC_DOUBLE_NOT(sign, bits, size, halfsize) \\\n"
127 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n"
128 " { \\\n"
129 " vec.dbl[0] = v##sign##int##bits##x##halfsize##_not(vec.dbl[0]); \\\n"
130 " vec.dbl[1] = v##sign##int##bits##x##halfsize##_not(vec.dbl[1]); \\\n"
131 " \\\n"
132 " return vec; \\\n"
133 " }\n"
134 "\n"
135 "#endif /* VEC_IMPL_DOUBLE_H_ */ \n"
136 "\n"
137 "/* ------------------------------------------------------------------------ */\n"
138 "/* PREPROCESSOR HELL INCOMING */\n"
139 "";
140
141 static const char *footer =
142 "" /* nothing */;
143
144 /* ------------------------------------------------------------------------ */
145
146 static void op_print_pp_halfsize(int op, int type, int bits, int size)
147 {
148 struct op_info *op_info = gen_op_info(op);
149
150 printf("defined(V%s%dx%d_%s_DEFINED)", type_str[type].u, bits, size / 2, op_info->u);
151 }
152
153 static void op_print_twoop(int op, int type, int bits, int size)
154 {
155 struct op_info *op_info = gen_op_info(op);
156
157 printf("\tvec1.dbl[0] = ");
158 gen_print_vtype(type, bits, size / 2);
159 printf("_%s(vec1.dbl[0], vec2.dbl[0]);\n", op_info->l);
160
161 printf("\tvec1.dbl[1] = ");
162 gen_print_vtype(type, bits, size / 2);
163 printf("_%s(vec1.dbl[1], vec2.dbl[1]);\n", op_info->l);
164
165 printf("\treturn vec1;\n");
166 }
167
168 static void op_print_unoop(int op, int type, int bits, int size)
169 {
170 struct op_info *op_info = gen_op_info(op);
171
172 printf("\tvec.dbl[0] = ");
173 gen_print_vtype(type, bits, size / 2);
174 printf("_%s(vec.dbl[0]);\n", op_info->l);
175
176 printf("\tvec1.dbl[1] = ");
177 gen_print_vtype(type, bits, size / 2);
178 printf("_%s(vec.dbl[1]);\n", op_info->l);
179
180 printf("\treturn vec;\n");
181 }
182
183 static inline void op_print_load(int op, int type, int bits, int size)
184 {
185 struct op_info *op_info = gen_op_info(op);
186
187 printf("\t");
188 gen_print_vtype(type, bits, size);
189 printf(" vec;\n");
190
191 printf("\tvec.dbl[0] = ");
192 gen_print_vtype(type, bits, size / 2);
193 printf("_%s(x);\n", op_info->l);
194
195 printf("\tvec.dbl[1] = ");
196 gen_print_vtype(type, bits, size / 2);
197 printf("_%s(x + %d);\n", op_info->l, size / 2);
198
199 printf("\treturn vec;\n");
200 }
201
202 static inline void op_print_splat(int op, int type, int bits, int size)
203 {
204 struct op_info *op_info = gen_op_info(op);
205
206 printf("\t");
207 gen_print_vtype(type, bits, size);
208 printf(" vec;\n");
209
210 printf("\tvec.dbl[0] = ");
211 gen_print_vtype(type, bits, size / 2);
212 printf("_%s(x);\n", op_info->l);
213
214 printf("\tvec.dbl[1] = ");
215 gen_print_vtype(type, bits, size / 2);
216 printf("_%s(x);\n", op_info->l);
217
218 printf("\treturn vec;\n");
219 }
220
221 static inline void op_print_store(int op, int type, int bits, int size)
222 {
223 struct op_info *op_info = gen_op_info(op);
224
225 printf("\t");
226 gen_print_vtype(type, bits, size / 2);
227 printf("_%s(vec.dbl[0], x);\n", op_info->l);
228
229 printf("\t");
230 gen_print_vtype(type, bits, size / 2);
231 printf("_%s(vec.dbl[1], x + %d);\n", op_info->l, size / 2);
232 }
233
234 static struct op_impl op_impl[OP_FINAL_] = {
235 [OP_SPLAT] = {NULL, op_print_pp_halfsize, op_print_splat},
236 [OP_LOAD_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_load},
237 [OP_LOAD] = {NULL, op_print_pp_halfsize, op_print_load},
238 [OP_STORE_ALIGNED] = {NULL, op_print_pp_halfsize, op_print_store},
239 [OP_STORE] = {NULL, op_print_pp_halfsize, op_print_store},
240
241 /* arithmetic */
242 [OP_ADD] = {NULL, op_print_pp_halfsize, op_print_twoop},
243 [OP_SUB] = {NULL, op_print_pp_halfsize, op_print_twoop},
244 [OP_MUL] = {NULL, op_print_pp_halfsize, op_print_twoop},
245 [OP_DIV] = {NULL, op_print_pp_halfsize, op_print_twoop},
246 [OP_MOD] = {NULL, op_print_pp_halfsize, op_print_twoop},
247 [OP_AVG] = {NULL, op_print_pp_halfsize, op_print_twoop},
248
249 /* bitwise */
250 [OP_AND] = {NULL, op_print_pp_halfsize, op_print_twoop},
251 [OP_OR] = {NULL, op_print_pp_halfsize, op_print_twoop},
252 [OP_XOR] = {NULL, op_print_pp_halfsize, op_print_twoop},
253 [OP_NOT] = {NULL, op_print_pp_halfsize, op_print_unoop},
254
255 /* min/max */
256 [OP_MIN] = {NULL, op_print_pp_halfsize, op_print_twoop},
257 [OP_MAX] = {NULL, op_print_pp_halfsize, op_print_twoop},
258
259 /* bitshift */
260 [OP_LSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
261 [OP_LRSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
262 [OP_RSHIFT] = {NULL, op_print_pp_halfsize, op_print_twoop},
263
264 /* comparison */
265 [OP_CMPLT] = {NULL, op_print_pp_halfsize, op_print_twoop},
266 [OP_CMPLE] = {NULL, op_print_pp_halfsize, op_print_twoop},
267 [OP_CMPEQ] = {NULL, op_print_pp_halfsize, op_print_twoop},
268 [OP_CMPGE] = {NULL, op_print_pp_halfsize, op_print_twoop},
269 [OP_CMPGT] = {NULL, op_print_pp_halfsize, op_print_twoop},
270 };
271
272 int main(void)
273 {
274 gen(op_impl, "double");
275 }