comparison utils/gengeneric.c @ 41:c6e0df09b86f

*: performance improvements with old GCC, reimplement altivec
author Paper <paper@tflc.us>
date Mon, 28 Apr 2025 16:31:59 -0400
parents 55cadb1fac4b
children
comparison
equal deleted inserted replaced
40:55cadb1fac4b 41:c6e0df09b86f
145 "\n" 145 "\n"
146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" 146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n"
147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" 147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n"
148 " { \\\n" 148 " { \\\n"
149 " for (int i = 0; i < size; i++) \\\n" 149 " for (int i = 0; i < size; i++) \\\n"
150 " vec1.generic[i] = vec_##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" 150 " vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n"
151 " \\\n" 151 " \\\n"
152 " return vec1; \\\n" 152 " return vec1; \\\n"
153 " }\n" 153 " }\n"
154 "\n" 154 "\n"
155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n" 155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n"
242 " \\\n" 242 " \\\n"
243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" 243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n"
244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" 244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n"
245 " \\\n" 245 " \\\n"
246 " return v##sign##int##bits##x##size##_or(a, b); \\\n" 246 " return v##sign##int##bits##x##size##_or(a, b); \\\n"
247 " }\n"
248 "\n"
249 "#define VEC_GENERIC_DBL_SPLAT(sign, bits, size, halfsize) \\\n"
250 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n"
251 " { \\\n"
252 " v##sign##int##bits##x##size vec; \\\n"
253 " \\\n"
254 " vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
255 " vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n"
256 " \\\n"
257 " return vec; \\\n"
258 " }\n"
259 "\n"
260 "#define VEC_GENERIC_DBL_LOAD_EX(name, sign, bits, size, halfsize) \\\n"
261 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n"
262 " { \\\n"
263 " v##sign##int##bits##x##size vec; \\\n"
264 " \\\n"
265 " vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n"
266 " vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n"
267 " \\\n"
268 " return vec; \\\n"
269 " }\n"
270 "\n"
271 "#define VEC_GENERIC_DBL_LOAD(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load, sign, bits, size, halfsize)\n"
272 "#define VEC_GENERIC_DBL_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n"
273 "\n"
274 "#define VEC_GENERIC_DBL_STORE_EX(name, sign, bits, size, halfsize) \\\n"
275 " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n"
276 " { \\\n"
277 " v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \\\n"
278 " v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \\\n"
279 " }\n"
280 "\n"
281 "#define VEC_GENERIC_DBL_STORE(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store, sign, bits, size, halfsize)\n"
282 "#define VEC_GENERIC_DBL_STORE_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store_aligned, sign, bits, size, halfsize)\n"
283 "\n"
284 "#define VEC_GENERIC_DBL_OP(name, sign, bits, size, halfsize, secondsign) \\\n"
285 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n"
286 " { \\\n"
287 " vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \\\n"
288 " vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \\\n"
289 " \\\n"
290 " return vec1; \\\n"
291 " }\n"
292 "\n"
293 "#define VEC_GENERIC_DBL_ADD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(add, sign, bits, size, halfsize, sign)\n"
294 "#define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)\n"
295 "#define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)\n"
296 "#define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)\n"
297 "#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)\n"
298 "#define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)\n"
299 "#define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)\n"
300 "#define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)\n"
301 "#define VEC_GENERIC_DBL_LRSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lrshift, sign, bits, size, halfsize, u)\n"
302 "#define VEC_GENERIC_DBL_AND(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(and, sign, bits, size, halfsize, sign)\n"
303 "#define VEC_GENERIC_DBL_OR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(or, sign, bits, size, halfsize, sign)\n"
304 "#define VEC_GENERIC_DBL_XOR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(xor, sign, bits, size, halfsize, sign)\n"
305 "#define VEC_GENERIC_DBL_MIN(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(min, sign, bits, size, halfsize, sign)\n"
306 "#define VEC_GENERIC_DBL_MAX(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(max, sign, bits, size, halfsize, sign)\n"
307 "#define VEC_GENERIC_DBL_CMPLT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmplt, sign, bits, size, halfsize, sign)\n"
308 "#define VEC_GENERIC_DBL_CMPLE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmple, sign, bits, size, halfsize, sign)\n"
309 "#define VEC_GENERIC_DBL_CMPEQ(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpeq, sign, bits, size, halfsize, sign)\n"
310 "#define VEC_GENERIC_DBL_CMPGE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpge, sign, bits, size, halfsize, sign)\n"
311 "#define VEC_GENERIC_DBL_CMPGT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpgt, sign, bits, size, halfsize, sign)\n"
312 "\n"
313 "#define VEC_GENERIC_DBL_NOT(sign, bits, size, halfsize) \\\n"
314 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n"
315 " { \\\n"
316 " vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \\\n"
317 " vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \\\n"
318 " \\\n"
319 " return vec; \\\n"
320 " }\n" 247 " }\n"
321 "\n" 248 "\n"
322 "/* ------------------------------------------------------------------------ */\n" 249 "/* ------------------------------------------------------------------------ */\n"
323 "/* PREPROCESSOR HELL INCOMING */\n"; 250 "/* PREPROCESSOR HELL INCOMING */\n";
324 251
333 "#ifndef V%sINT%dx%d_%s_DEFINED\n" 260 "#ifndef V%sINT%dx%d_%s_DEFINED\n"
334 "VEC_GENERIC_%s(%s, %d, %d)\n" 261 "VEC_GENERIC_%s(%s, %d, %d)\n"
335 "# define V%sINT%dx%d_%s_DEFINED\n" 262 "# define V%sINT%dx%d_%s_DEFINED\n"
336 "#endif\n", 263 "#endif\n",
337 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); 264 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op);
338 }
339
340 static void print_generic_dbl_op(const char *op, int is_signed, int bits, int size)
341 {
342 printf(
343 "#ifndef V%sINT%dx%d_%s_DEFINED\n"
344 "VEC_GENERIC_DBL_%s(%s, %d, %d, %d)\n"
345 "# define V%sINT%dx%d_%s_DEFINED\n"
346 "#endif\n\n",
347 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, size / 2, (is_signed ? "" : "U"), bits, size, op);
348 } 265 }
349 266
350 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); 267 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size);
351 268
352 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) 269 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op)
394 int bits, size; 311 int bits, size;
395 print_op_spec print_op; 312 print_op_spec print_op;
396 } defs[] = { 313 } defs[] = {
397 /* -- 8-bit */ 314 /* -- 8-bit */
398 {8, 2, print_generic_op}, 315 {8, 2, print_generic_op},
399 {8, 4, print_generic_dbl_op},
400 {8, 8, print_generic_dbl_op},
401 {8, 16, print_generic_dbl_op},
402 {8, 32, print_generic_dbl_op},
403 {8, 64, print_generic_dbl_op},
404 316
405 /* -- 16-bit */ 317 /* -- 16-bit */
406 {16, 2, print_generic_op}, 318 {16, 2, print_generic_op},
407 {16, 4, print_generic_dbl_op},
408 {16, 8, print_generic_dbl_op},
409 {16, 16, print_generic_dbl_op},
410 {16, 32, print_generic_dbl_op},
411 319
412 /* -- 32-bit */ 320 /* -- 32-bit */
413 {32, 2, print_generic_op}, 321 {32, 2, print_generic_op},
414 {32, 4, print_generic_dbl_op},
415 {32, 8, print_generic_dbl_op},
416 {32, 16, print_generic_dbl_op},
417 322
418 /* -- 64-bit */ 323 /* -- 64-bit */
419 {64, 2, print_generic_op}, 324 {64, 2, print_generic_op},
420 {64, 4, print_generic_dbl_op},
421 {64, 8, print_generic_dbl_op},
422 }; 325 };
423 int i; 326 int i;
424 327
425 puts(header); 328 puts(header);
426 329