Mercurial > vec
comparison src/cpu.c @ 28:c6c99ab1088a
*: add min/max functions and a big big refactor (again)
agh, this time I added a few more implementations (and generally
made the code just a little faster...)
| author | Paper <paper@tflc.us> |
|---|---|
| date | Thu, 24 Apr 2025 00:54:02 -0400 |
| parents | 92156fe32755 |
| children | bf6ad516f1e6 |
comparison
equal
deleted
inserted
replaced
| 27:d00b95f95dd1 | 28:c6c99ab1088a |
|---|---|
| 360 static inline int vec_CPU_have_AVX512F(void) | 360 static inline int vec_CPU_have_AVX512F(void) |
| 361 { | 361 { |
| 362 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { | 362 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { |
| 363 int a, b, c, d; | 363 int a, b, c, d; |
| 364 VEC_CPU_CPUID(7, a, b, c, d); | 364 VEC_CPU_CPUID(7, a, b, c, d); |
| 365 return b & 0x00000020; | 365 return b & 0x00010000; |
| 366 (void)a, (void)c, (void)d; | |
| 367 } | |
| 368 return 0; | |
| 369 } | |
| 370 | |
| 371 static inline int vec_CPU_have_AVX512DQ(void) | |
| 372 { | |
| 373 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { | |
| 374 int a, b, c, d; | |
| 375 VEC_CPU_CPUID(7, a, b, c, d); | |
| 376 return b & 0x00020000; | |
| 377 (void)a, (void)c, (void)d; | |
| 378 } | |
| 379 return 0; | |
| 380 } | |
| 381 | |
| 382 static inline int vec_CPU_have_AVX512BW(void) | |
| 383 { | |
| 384 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { | |
| 385 int a, b, c, d; | |
| 386 VEC_CPU_CPUID(7, a, b, c, d); | |
| 387 return b & 0x40000000; | |
| 366 (void)a, (void)c, (void)d; | 388 (void)a, (void)c, (void)d; |
| 367 } | 389 } |
| 368 return 0; | 390 return 0; |
| 369 } | 391 } |
| 370 | 392 |
| 489 vec_CPU_features |= VEC_CPU_HAS_AVX; | 511 vec_CPU_features |= VEC_CPU_HAS_AVX; |
| 490 if (vec_CPU_have_AVX2()) | 512 if (vec_CPU_have_AVX2()) |
| 491 vec_CPU_features |= VEC_CPU_HAS_AVX2; | 513 vec_CPU_features |= VEC_CPU_HAS_AVX2; |
| 492 if (vec_CPU_have_AVX512F()) | 514 if (vec_CPU_have_AVX512F()) |
| 493 vec_CPU_features |= VEC_CPU_HAS_AVX512F; | 515 vec_CPU_features |= VEC_CPU_HAS_AVX512F; |
| 516 if (vec_CPU_have_AVX512BW()) | |
| 517 vec_CPU_features |= VEC_CPU_HAS_AVX512BW; | |
| 518 if (vec_CPU_have_AVX512DQ()) | |
| 519 vec_CPU_features |= VEC_CPU_HAS_AVX512DQ; | |
| 494 if (vec_CPU_have_NEON()) | 520 if (vec_CPU_have_NEON()) |
| 495 vec_CPU_features |= VEC_CPU_HAS_NEON; | 521 vec_CPU_features |= VEC_CPU_HAS_NEON; |
| 496 } | 522 } |
| 497 return vec_CPU_features; | 523 return vec_CPU_features; |
| 498 } | 524 } |
