Mercurial > vec
comparison utils/gengeneric.c @ 41:c6e0df09b86f
*: performance improvements with old GCC, reimplement altivec
| author | Paper <paper@tflc.us> |
|---|---|
| date | Mon, 28 Apr 2025 16:31:59 -0400 |
| parents | 55cadb1fac4b |
| children |
comparison
equal
deleted
inserted
replaced
| 40:55cadb1fac4b | 41:c6e0df09b86f |
|---|---|
| 145 "\n" | 145 "\n" |
| 146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" | 146 "#define VEC_GENERIC_AVG(sign, bits, size) \\\n" |
| 147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" | 147 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \\\n" |
| 148 " { \\\n" | 148 " { \\\n" |
| 149 " for (int i = 0; i < size; i++) \\\n" | 149 " for (int i = 0; i < size; i++) \\\n" |
| 150 " vec1.generic[i] = vec_##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" | 150 " vec1.generic[i] = vec_im##sign##avg(vec1.generic[i], vec2.generic[i]); \\\n" |
| 151 " \\\n" | 151 " \\\n" |
| 152 " return vec1; \\\n" | 152 " return vec1; \\\n" |
| 153 " }\n" | 153 " }\n" |
| 154 "\n" | 154 "\n" |
| 155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n" | 155 "#define VEC_GENERIC_AND(sign, bits, size) \\\n" |
| 242 " \\\n" | 242 " \\\n" |
| 243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" | 243 " v##sign##int##bits##x##size a = v##sign##int##bits##x##size##_and(vec1, cmplt); \\\n" |
| 244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" | 244 " v##sign##int##bits##x##size b = v##sign##int##bits##x##size##_and(vec2, v##sign##int##bits##x##size##_not(cmplt)); \\\n" |
| 245 " \\\n" | 245 " \\\n" |
| 246 " return v##sign##int##bits##x##size##_or(a, b); \\\n" | 246 " return v##sign##int##bits##x##size##_or(a, b); \\\n" |
| 247 " }\n" | |
| 248 "\n" | |
| 249 "#define VEC_GENERIC_DBL_SPLAT(sign, bits, size, halfsize) \\\n" | |
| 250 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x) \\\n" | |
| 251 " { \\\n" | |
| 252 " v##sign##int##bits##x##size vec; \\\n" | |
| 253 " \\\n" | |
| 254 " vec.generic[0] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" | |
| 255 " vec.generic[1] = v##sign##int##bits##x##halfsize##_splat(x); \\\n" | |
| 256 " \\\n" | |
| 257 " return vec; \\\n" | |
| 258 " }\n" | |
| 259 "\n" | |
| 260 "#define VEC_GENERIC_DBL_LOAD_EX(name, sign, bits, size, halfsize) \\\n" | |
| 261 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(const vec_##sign##int##bits x[size]) \\\n" | |
| 262 " { \\\n" | |
| 263 " v##sign##int##bits##x##size vec; \\\n" | |
| 264 " \\\n" | |
| 265 " vec.generic[0] = v##sign##int##bits##x##halfsize##_##name(x); \\\n" | |
| 266 " vec.generic[1] = v##sign##int##bits##x##halfsize##_##name(x + halfsize); \\\n" | |
| 267 " \\\n" | |
| 268 " return vec; \\\n" | |
| 269 " }\n" | |
| 270 "\n" | |
| 271 "#define VEC_GENERIC_DBL_LOAD(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load, sign, bits, size, halfsize)\n" | |
| 272 "#define VEC_GENERIC_DBL_LOAD_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_LOAD_EX(load_aligned, sign, bits, size, halfsize)\n" | |
| 273 "\n" | |
| 274 "#define VEC_GENERIC_DBL_STORE_EX(name, sign, bits, size, halfsize) \\\n" | |
| 275 " VEC_FUNC_IMPL void v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec, vec_##sign##int##bits x[size]) \\\n" | |
| 276 " { \\\n" | |
| 277 " v##sign##int##bits##x##halfsize##_##name(vec.generic[0], x); \\\n" | |
| 278 " v##sign##int##bits##x##halfsize##_##name(vec.generic[1], x + halfsize); \\\n" | |
| 279 " }\n" | |
| 280 "\n" | |
| 281 "#define VEC_GENERIC_DBL_STORE(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store, sign, bits, size, halfsize)\n" | |
| 282 "#define VEC_GENERIC_DBL_STORE_ALIGNED(sign, bits, size, halfsize) VEC_GENERIC_DBL_STORE_EX(store_aligned, sign, bits, size, halfsize)\n" | |
| 283 "\n" | |
| 284 "#define VEC_GENERIC_DBL_OP(name, sign, bits, size, halfsize, secondsign) \\\n" | |
| 285 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_##name(v##sign##int##bits##x##size vec1, v##secondsign##int##bits##x##size vec2) \\\n" | |
| 286 " { \\\n" | |
| 287 " vec1.generic[0] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[0], vec2.generic[0]); \\\n" | |
| 288 " vec1.generic[1] = v##sign##int##bits##x##halfsize##_##name(vec1.generic[1], vec2.generic[1]); \\\n" | |
| 289 " \\\n" | |
| 290 " return vec1; \\\n" | |
| 291 " }\n" | |
| 292 "\n" | |
| 293 "#define VEC_GENERIC_DBL_ADD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(add, sign, bits, size, halfsize, sign)\n" | |
| 294 "#define VEC_GENERIC_DBL_SUB(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(sub, sign, bits, size, halfsize, sign)\n" | |
| 295 "#define VEC_GENERIC_DBL_MUL(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mul, sign, bits, size, halfsize, sign)\n" | |
| 296 "#define VEC_GENERIC_DBL_DIV(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(div, sign, bits, size, halfsize, sign)\n" | |
| 297 "#define VEC_GENERIC_DBL_MOD(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(mod, sign, bits, size, halfsize, sign)\n" | |
| 298 "#define VEC_GENERIC_DBL_AVG(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(avg, sign, bits, size, halfsize, sign)\n" | |
| 299 "#define VEC_GENERIC_DBL_LSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lshift, sign, bits, size, halfsize, u)\n" | |
| 300 "#define VEC_GENERIC_DBL_RSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(rshift, sign, bits, size, halfsize, u)\n" | |
| 301 "#define VEC_GENERIC_DBL_LRSHIFT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(lrshift, sign, bits, size, halfsize, u)\n" | |
| 302 "#define VEC_GENERIC_DBL_AND(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(and, sign, bits, size, halfsize, sign)\n" | |
| 303 "#define VEC_GENERIC_DBL_OR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(or, sign, bits, size, halfsize, sign)\n" | |
| 304 "#define VEC_GENERIC_DBL_XOR(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(xor, sign, bits, size, halfsize, sign)\n" | |
| 305 "#define VEC_GENERIC_DBL_MIN(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(min, sign, bits, size, halfsize, sign)\n" | |
| 306 "#define VEC_GENERIC_DBL_MAX(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(max, sign, bits, size, halfsize, sign)\n" | |
| 307 "#define VEC_GENERIC_DBL_CMPLT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmplt, sign, bits, size, halfsize, sign)\n" | |
| 308 "#define VEC_GENERIC_DBL_CMPLE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmple, sign, bits, size, halfsize, sign)\n" | |
| 309 "#define VEC_GENERIC_DBL_CMPEQ(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpeq, sign, bits, size, halfsize, sign)\n" | |
| 310 "#define VEC_GENERIC_DBL_CMPGE(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpge, sign, bits, size, halfsize, sign)\n" | |
| 311 "#define VEC_GENERIC_DBL_CMPGT(sign, bits, size, halfsize) VEC_GENERIC_DBL_OP(cmpgt, sign, bits, size, halfsize, sign)\n" | |
| 312 "\n" | |
| 313 "#define VEC_GENERIC_DBL_NOT(sign, bits, size, halfsize) \\\n" | |
| 314 " VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_not(v##sign##int##bits##x##size vec) \\\n" | |
| 315 " { \\\n" | |
| 316 " vec.generic[0] = v##sign##int##bits##x##halfsize##_not(vec.generic[0]); \\\n" | |
| 317 " vec.generic[1] = v##sign##int##bits##x##halfsize##_not(vec.generic[1]); \\\n" | |
| 318 " \\\n" | |
| 319 " return vec; \\\n" | |
| 320 " }\n" | 247 " }\n" |
| 321 "\n" | 248 "\n" |
| 322 "/* ------------------------------------------------------------------------ */\n" | 249 "/* ------------------------------------------------------------------------ */\n" |
| 323 "/* PREPROCESSOR HELL INCOMING */\n"; | 250 "/* PREPROCESSOR HELL INCOMING */\n"; |
| 324 | 251 |
| 333 "#ifndef V%sINT%dx%d_%s_DEFINED\n" | 260 "#ifndef V%sINT%dx%d_%s_DEFINED\n" |
| 334 "VEC_GENERIC_%s(%s, %d, %d)\n" | 261 "VEC_GENERIC_%s(%s, %d, %d)\n" |
| 335 "# define V%sINT%dx%d_%s_DEFINED\n" | 262 "# define V%sINT%dx%d_%s_DEFINED\n" |
| 336 "#endif\n", | 263 "#endif\n", |
| 337 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); | 264 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, (is_signed ? "" : "U"), bits, size, op); |
| 338 } | |
| 339 | |
| 340 static void print_generic_dbl_op(const char *op, int is_signed, int bits, int size) | |
| 341 { | |
| 342 printf( | |
| 343 "#ifndef V%sINT%dx%d_%s_DEFINED\n" | |
| 344 "VEC_GENERIC_DBL_%s(%s, %d, %d, %d)\n" | |
| 345 "# define V%sINT%dx%d_%s_DEFINED\n" | |
| 346 "#endif\n\n", | |
| 347 (is_signed ? "" : "U"), bits, size, op, op, (is_signed ? "/* nothing */" : "u"), bits, size, size / 2, (is_signed ? "" : "U"), bits, size, op); | |
| 348 } | 265 } |
| 349 | 266 |
| 350 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); | 267 typedef void (*print_op_spec)(const char *op, int is_signed, int bits, int size); |
| 351 | 268 |
| 352 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) | 269 static inline void print_ops(int is_signed, int bits, int size, print_op_spec print_op) |
| 394 int bits, size; | 311 int bits, size; |
| 395 print_op_spec print_op; | 312 print_op_spec print_op; |
| 396 } defs[] = { | 313 } defs[] = { |
| 397 /* -- 8-bit */ | 314 /* -- 8-bit */ |
| 398 {8, 2, print_generic_op}, | 315 {8, 2, print_generic_op}, |
| 399 {8, 4, print_generic_dbl_op}, | |
| 400 {8, 8, print_generic_dbl_op}, | |
| 401 {8, 16, print_generic_dbl_op}, | |
| 402 {8, 32, print_generic_dbl_op}, | |
| 403 {8, 64, print_generic_dbl_op}, | |
| 404 | 316 |
| 405 /* -- 16-bit */ | 317 /* -- 16-bit */ |
| 406 {16, 2, print_generic_op}, | 318 {16, 2, print_generic_op}, |
| 407 {16, 4, print_generic_dbl_op}, | |
| 408 {16, 8, print_generic_dbl_op}, | |
| 409 {16, 16, print_generic_dbl_op}, | |
| 410 {16, 32, print_generic_dbl_op}, | |
| 411 | 319 |
| 412 /* -- 32-bit */ | 320 /* -- 32-bit */ |
| 413 {32, 2, print_generic_op}, | 321 {32, 2, print_generic_op}, |
| 414 {32, 4, print_generic_dbl_op}, | |
| 415 {32, 8, print_generic_dbl_op}, | |
| 416 {32, 16, print_generic_dbl_op}, | |
| 417 | 322 |
| 418 /* -- 64-bit */ | 323 /* -- 64-bit */ |
| 419 {64, 2, print_generic_op}, | 324 {64, 2, print_generic_op}, |
| 420 {64, 4, print_generic_dbl_op}, | |
| 421 {64, 8, print_generic_dbl_op}, | |
| 422 }; | 325 }; |
| 423 int i; | 326 int i; |
| 424 | 327 |
| 425 puts(header); | 328 puts(header); |
| 426 | 329 |
