Mercurial > vec
comparison gen/gengcc.c @ 45:7955bed1d169 default tip
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
author | Paper <paper@tflc.us> |
---|---|
date | Wed, 30 Apr 2025 18:36:38 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
44:b0a3f0248ecc | 45:7955bed1d169 |
---|---|
1 /** | |
2 * vec - a tiny SIMD vector library in C99 | |
3 * | |
4 * Copyright (c) 2024-2025 Paper | |
5 * | |
6 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 * of this software and associated documentation files (the "Software"), to deal | |
8 * in the Software without restriction, including without limitation the rights | |
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 * copies of the Software, and to permit persons to whom the Software is | |
11 * furnished to do so, subject to the following conditions: | |
12 * | |
13 * The above copyright notice and this permission notice shall be included in all | |
14 * copies or substantial portions of the Software. | |
15 * | |
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
22 * SOFTWARE. | |
23 **/ | |
24 | |
25 #include "genlib.h" | |
26 | |
27 #define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) | |
28 | |
29 static int gcc_minmax_only_integer(int op, int type, int bits, int size) | |
30 { | |
31 return (type == TYPE_INT || type == TYPE_UINT); | |
32 } | |
33 | |
34 /* ------------------------------------------------------------------------ */ | |
35 | |
36 static void pp_gcc_prereq_4_3_0(int op, int type, int bits, int size) | |
37 { | |
38 printf("VEC_GNUC_ATLEAST(4, 3, 0)"); | |
39 } | |
40 | |
41 static void gcc_print_easy_op(int op, int type, int bits, int size) | |
42 { | |
43 static const char *op_builtins[] = { | |
44 [OP_ADD] = "+", | |
45 [OP_SUB] = "-", | |
46 [OP_MUL] = "*", | |
47 [OP_DIV] = "/", | |
48 [OP_MOD] = "%", | |
49 [OP_AND] = "&", | |
50 [OP_OR] = "|", | |
51 [OP_XOR] = "^", | |
52 [OP_CMPLT] = "<", | |
53 [OP_CMPLE] = "<=", | |
54 [OP_CMPEQ] = "==", | |
55 [OP_CMPGE] = ">=", | |
56 [OP_CMPGT] = ">", | |
57 }; | |
58 | |
59 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); | |
60 printf("\treturn vec1;\n"); | |
61 } | |
62 | |
63 static void gcc_print_splat(int op, int type, int bits, int size) | |
64 { | |
65 int i; | |
66 | |
67 printf("\t"); | |
68 gen_print_vtype(type, bits, size); | |
69 printf(" vec;\n"); | |
70 for (i = 0; i < size; i++) | |
71 printf("\tvec.gcc[%d] = x;\n", i); | |
72 printf("\treturn vec;\n"); | |
73 } | |
74 | |
75 static void gcc_print_load_aligned(int op, int type, int bits, int size) | |
76 { | |
77 printf("\t"); | |
78 gen_print_vtype(type, bits, size); | |
79 printf(" vec;\n"); | |
80 puts("\tvec.gcc = *(__typeof__(vec.gcc) *)x;"); | |
81 printf("\treturn vec;\n"); | |
82 } | |
83 | |
84 static void gcc_print_load(int op, int type, int bits, int size) | |
85 { | |
86 printf("\t"); | |
87 gen_print_vtype(type, bits, size); | |
88 printf(" vec;\n"); | |
89 puts("\tmemcpy(&vec, x, sizeof(vec));"); | |
90 printf("\treturn vec;\n"); | |
91 } | |
92 | |
93 static void gcc_print_store_aligned(int op, int type, int bits, int size) | |
94 { | |
95 puts("\t*(__typeof__(vec.gcc) *)x = vec.gcc;"); | |
96 } | |
97 | |
98 static void gcc_print_store(int op, int type, int bits, int size) | |
99 { | |
100 puts("\tmemcpy(x, &vec, sizeof(vec));"); | |
101 } | |
102 | |
103 static void gcc_print_rorlshift(int op, int type, int bits, int size) | |
104 { | |
105 static const char *op_builtins[] = { | |
106 [OP_LSHIFT] = "<<", | |
107 [OP_RSHIFT] = ">>", | |
108 }; | |
109 | |
110 printf("\tvec1.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); | |
111 printf("\treturn vec1;\n"); | |
112 } | |
113 | |
114 static void gcc_print_lrshift(int op, int type, int bits, int size) | |
115 { | |
116 printf("\tvec1.gcc = (__typeof__(vec1.gcc))((vec_uint%d __attribute__((__vector_size__(%d))))vec1.gcc >> vec2.gcc);\n", bits, bits * size / 8); | |
117 printf("\treturn vec1;\n"); | |
118 } | |
119 | |
120 static void gcc_print_minmax(int op, int type, int bits, int size) | |
121 { | |
122 static const char *op_builtins[] = { | |
123 [OP_MIN] = "<", | |
124 [OP_MAX] = ">" | |
125 }; | |
126 | |
127 switch (type) { | |
128 case TYPE_INT: | |
129 case TYPE_UINT: | |
130 /* yay */ | |
131 printf("\t"); | |
132 gen_print_vtype(type, bits, size); | |
133 printf(" mask;\n"); | |
134 printf("\tmask.gcc = (vec1.gcc %s vec2.gcc);\n", op_builtins[op]); | |
135 printf("\tvec1.gcc = (vec1.gcc & mask.gcc) | (vec2.gcc & ~mask.gcc);\n"); | |
136 printf("\treturn vec1;\n"); | |
137 break; | |
138 default: | |
139 /* hm? */ | |
140 break; | |
141 } | |
142 } | |
143 | |
144 static void gcc_print_avg(int op, int type, int bits, int size) | |
145 { | |
146 switch (type) { | |
147 case TYPE_INT: | |
148 printf("\tvint%dx%d ones = vint%dx%d_splat(1);\n", bits, size, bits, size); | |
149 puts("\t__typeof__(vec1.gcc) x_d_rem = (vec1.gcc % 2);"); | |
150 puts("\t__typeof__(vec1.gcc) y_d_rem = (vec2.gcc % 2);"); | |
151 puts("\t__typeof__(vec1.gcc) rem_d_quot = ((x_d_rem + y_d_rem) / 2);"); | |
152 puts("\t__typeof__(vec1.gcc) rem_d_rem = ((x_d_rem + y_d_rem) % 2);"); | |
153 puts(""); | |
154 printf("\tvec1.gcc = ((vec1.gcc / 2) + (vec2.gcc / 2)) + (rem_d_quot) + ((rem_d_rem == 1) & ones.gcc);\n"); | |
155 break; | |
156 case TYPE_UINT: | |
157 printf("\tvec1.gcc = (vec1.gcc >> 1) + (vec2.gcc >> 1) + ((vec1.gcc | vec2.gcc) & 1);\n"); | |
158 break; | |
159 case TYPE_FLOAT: | |
160 printf("\tvec1.gcc = (vec1.gcc + vec2.gcc) / 2;\n"); | |
161 break; | |
162 } | |
163 | |
164 printf("\treturn vec1;\n"); | |
165 } | |
166 | |
167 static void gcc_print_not(int op, int type, int bits, int size) | |
168 { | |
169 printf("\tvec.gcc = ~vec.gcc;\n"); | |
170 printf("\treturn vec;\n"); | |
171 } | |
172 | |
173 /* ------------------------------------------------------------------------ */ | |
174 | |
175 static struct op_impl op_impl[OP_FINAL_] = { | |
176 [OP_SPLAT] = {NULL, NULL, gcc_print_splat}, | |
177 [OP_LOAD_ALIGNED] = {NULL, NULL, gcc_print_load_aligned}, | |
178 [OP_LOAD] = {NULL, NULL, gcc_print_load}, | |
179 [OP_STORE_ALIGNED] = {NULL, NULL, gcc_print_store_aligned}, | |
180 [OP_STORE] = {NULL, NULL, gcc_print_store}, | |
181 | |
182 /* arithmetic */ | |
183 [OP_ADD] = {NULL, NULL, gcc_print_easy_op}, | |
184 [OP_SUB] = {NULL, NULL, gcc_print_easy_op}, | |
185 [OP_MUL] = {NULL, NULL, gcc_print_easy_op}, | |
186 #if 0 | |
187 /* no defined divide by zero behavior */ | |
188 [OP_DIV] = {NULL, NULL, gcc_print_easy_op}, | |
189 [OP_MOD] = {NULL, NULL, gcc_print_easy_op}, | |
190 #endif | |
191 [OP_AVG] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_avg}, | |
192 | |
193 /* bitwise */ | |
194 [OP_AND] = {NULL, NULL, gcc_print_easy_op}, | |
195 [OP_OR] = {NULL, NULL, gcc_print_easy_op}, | |
196 [OP_XOR] = {NULL, NULL, gcc_print_easy_op}, | |
197 [OP_NOT] = {NULL, NULL, gcc_print_not}, | |
198 | |
199 /* min/max */ | |
200 [OP_MIN] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, | |
201 [OP_MAX] = {gcc_minmax_only_integer, pp_gcc_prereq_4_3_0, gcc_print_minmax}, | |
202 | |
203 /* bitshift */ | |
204 [OP_LSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, | |
205 [OP_LRSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_lrshift}, | |
206 [OP_RSHIFT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_rorlshift}, | |
207 | |
208 /* comparison */ | |
209 [OP_CMPLT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, | |
210 [OP_CMPLE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, | |
211 [OP_CMPEQ] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, | |
212 [OP_CMPGE] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, | |
213 [OP_CMPGT] = {NULL, pp_gcc_prereq_4_3_0, gcc_print_easy_op}, | |
214 }; | |
215 | |
216 int main(void) | |
217 { | |
218 gen(op_impl, "gcc"); | |
219 } |