Mercurial > vec
comparison utils/gengeneric.c @ 41:c6e0df09b86f default tip
*: performance improvements with old GCC, reimplement altivec
author | Paper <paper@tflc.us> |
---|---|
date | Mon, 28 Apr 2025 16:31:59 -0400 |
parents | 55cadb1fac4b |
children |
comparison
equal
deleted
inserted
replaced
40:55cadb1fac4b | 41:c6e0df09b86f |
---|---|
145 "\n" | 145 "\n" |
146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" | 146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" |
147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" | 147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" |
148 " { \\\n" | 148 " { \\\n" |
149 " for (int i = 0; i < size; i++) \\\n" | 149 " for (int i = 0; i < size; i++) \\\n" |
150 " vec1.generic[i] = vec_##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" | 150 " vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" |
151 " \\\n" | 151 " \\\n" |
152 " return vec1; \\\n" | 152 " return vec1; \\\n" |
153 " }\n" | 153 " }\n" |
154 "\n" | 154 "\n" |
155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n" | 155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n" |
242 " \\\n" | 242 " \\\n" |
243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" | 243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" |
244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" | 244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" |
245 " \\\n" | 245 " \\\n" |
246 " return v##sign##int##bits##x##size##_or(a, b); \\\n" | 246 " return v##sign##int##bits##x##size##_or(a, b); \\\n" |
247 " }\n" | |
248 "\n" | |
249 "#define VEC_GENERIC_DBL_SPLAT(sign, bits, size, halfsize) \\\n" | |
250 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n" | |
251 " { \\\n" | |
252 " v##sign##int##bits##x##size vec; \\\n" | |
253 " \\\n" | |
254 " vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" | |
255 " vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" | |
256 " \\\n" | |
257 " return vec; \\\n" | |
258 " }\n" | |
259 "\n" | |
260 "#define VEC_GENERIC_DBL_LOAD_EX(name, sign, bits, size, halfsize) \\\n" | |
261 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n" | |
262 " { \\\n" | |
263 " v##sign##int##bits##x##size vec; \\\n" | |
264 " \\\n" | |
265 " vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n" | |
266 " vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n" | |
267 " \\\n" | |
268 " return vec; \\\n" | |
269 " }\n" | |
270 "\n" | |
271 "#define VEC_GENERIC_DBL_LOAD(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load, sign, bits, size, halfsize)\n" | |
272 "#define VEC_GENERIC_DBL_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n" | |
273 "\n" | |
274 "#define VEC_GENERIC_DBL_STORE_EX(name, sign, bits, size, halfsize) \\\n" | |
275 " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n" | |
276 " { \\\n" | |
277 " v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \\\n" | |
278 " v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \\\n" | |
279 " }\n" | |
280 "\n" | |
281 "#define VEC_GENERIC_DBL_STORE(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store, sign, bits, size, halfsize)\n" | |
282 "#define VEC_GENERIC_DBL_STORE_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store_aligned, sign, bits, size, halfsize)\n" | |
283 "\n" | |
284 "#define VEC_GENERIC_DBL_OP(name, sign, bits, size, halfsize, secondsign) \\\n" | |
285 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n" | |
286 " { \\\n" | |
287 " vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \\\n" | |
288 " vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \\\n" | |
289 " \\\n" | |
290 " return vec1; \\\n" | |
291 " }\n" | |
292 "\n" | |
293 "#define VEC_GENERIC_DBL_ADD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(add, sign, bits, size, halfsize, sign)\n" | |
294 "#define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)\n" | |
295 "#define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)\n" | |
296 "#define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)\n" | |
297 "#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)\n" | |
298 "#define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)\n" | |
299 "#define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)\n" | |
300 "#define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)\n" | |
301 "#define VEC_GENERIC_DBL_LRSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lrshift, sign, bits, size, halfsize, u)\n" | |
302 "#define VEC_GENERIC_DBL_AND(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(and, sign, bits, size, halfsize, sign)\n" | |
303 "#define VEC_GENERIC_DBL_OR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(or, sign, bits, size, halfsize, sign)\n" | |
304 "#define VEC_GENERIC_DBL_XOR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(xor, sign, bits, size, halfsize, sign)\n" | |
305 "#define VEC_GENERIC_DBL_MIN(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(min, sign, bits, size, halfsize, sign)\n" | |
306 "#define VEC_GENERIC_DBL_MAX(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(max, sign, bits, size, halfsize, sign)\n" | |
307 "#define VEC_GENERIC_DBL_CMPLT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmplt, sign, bits, size, halfsize, sign)\n" | |
308 "#define VEC_GENERIC_DBL_CMPLE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmple, sign, bits, size, halfsize, sign)\n" | |
309 "#define VEC_GENERIC_DBL_CMPEQ(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpeq, sign, bits, size, halfsize, sign)\n" | |
310 "#define VEC_GENERIC_DBL_CMPGE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpge, sign, bits, size, halfsize, sign)\n" | |
311 "#define VEC_GENERIC_DBL_CMPGT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpgt, sign, bits, size, halfsize, sign)\n" | |
312 "\n" | |
313 "#define VEC_GENERIC_DBL_NOT(sign, bits, size, halfsize) \\\n" | |
314 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n" | |
315 " { \\\n" | |
316 " vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \\\n" | |
317 " vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \\\n" | |
318 " \\\n" | |
319 " return vec; \\\n" | |
320 " }\n" | 247 " }\n" |
321 "\n" | 248 "\n" |
322 "/* ------------------------------------------------------------------------ */\n" | 249 "/* ------------------------------------------------------------------------ */\n" |
323 "/* PREPROCESSOR HELL INCOMING */\n"; | 250 "/* PREPROCESSOR HELL INCOMING */\n"; |
324 | 251 |
333 "#ifndef V%sINT%dx%d_%s_DEFINED\n" | 260 "#ifndef V%sINT%dx%d_%s_DEFINED\n" |
334 "VEC_GENERIC_%s(%s, %d, %d)\n" | 261 "VEC_GENERIC_%s(%s, %d, %d)\n" |
335 "# define V%sINT%dx%d_%s_DEFINED\n" | 262 "# define V%sINT%dx%d_%s_DEFINED\n" |
336 "#endif\n", | 263 "#endif\n", |
337 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); | 264 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); |
338 } | |
339 | |
340 static void print_generic_dbl_op(const char *op, int is_signed, int bits, int size) | |
341 { | |
342 printf( | |
343 "#ifndef V%sINT%dx%d_%s_DEFINED\n" | |
344 "VEC_GENERIC_DBL_%s(%s, %d, %d, %d)\n" | |
345 "# define V%sINT%dx%d_%s_DEFINED\n" | |
346 "#endif\n\n", | |
347 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, size / 2, (is_signed ? "" : "U"), bits, size, op); | |
348 } | 265 } |
349 | 266 |
350 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); | 267 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); |
351 | 268 |
352 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) | 269 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) |
394 int bits, size; | 311 int bits, size; |
395 print_op_spec print_op; | 312 print_op_spec print_op; |
396 } defs[] = { | 313 } defs[] = { |
397 /* -- 8-bit */ | 314 /* -- 8-bit */ |
398 {8, 2, print_generic_op}, | 315 {8, 2, print_generic_op}, |
399 {8, 4, print_generic_dbl_op}, | |
400 {8, 8, print_generic_dbl_op}, | |
401 {8, 16, print_generic_dbl_op}, | |
402 {8, 32, print_generic_dbl_op}, | |
403 {8, 64, print_generic_dbl_op}, | |
404 | 316 |
405 /* -- 16-bit */ | 317 /* -- 16-bit */ |
406 {16, 2, print_generic_op}, | 318 {16, 2, print_generic_op}, |
407 {16, 4, print_generic_dbl_op}, | |
408 {16, 8, print_generic_dbl_op}, | |
409 {16, 16, print_generic_dbl_op}, | |
410 {16, 32, print_generic_dbl_op}, | |
411 | 319 |
412 /* -- 32-bit */ | 320 /* -- 32-bit */ |
413 {32, 2, print_generic_op}, | 321 {32, 2, print_generic_op}, |
414 {32, 4, print_generic_dbl_op}, | |
415 {32, 8, print_generic_dbl_op}, | |
416 {32, 16, print_generic_dbl_op}, | |
417 | 322 |
418 /* -- 64-bit */ | 323 /* -- 64-bit */ |
419 {64, 2, print_generic_op}, | 324 {64, 2, print_generic_op}, |
420 {64, 4, print_generic_dbl_op}, | |
421 {64, 8, print_generic_dbl_op}, | |
422 }; | 325 }; |
423 int i; | 326 int i; |
424 | 327 |
425 puts(header); | 328 puts(header); |
426 | 329 |