comparison src/cpu.c @ 28:c6c99ab1088a

*: add min/max functions and a big big refactor (again) agh, this time I added a few more implementations (and generally made the code just a little faster...)
author Paper <paper@tflc.us>
date Thu, 24 Apr 2025 00:54:02 -0400
parents 92156fe32755
children bf6ad516f1e6
comparison
equal deleted inserted replaced
27:d00b95f95dd1 28:c6c99ab1088a
360 static inline int vec_CPU_have_AVX512F(void) 360 static inline int vec_CPU_have_AVX512F(void)
361 { 361 {
362 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { 362 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) {
363 int a, b, c, d; 363 int a, b, c, d;
364 VEC_CPU_CPUID(7, a, b, c, d); 364 VEC_CPU_CPUID(7, a, b, c, d);
365 return b & 0x00000020; 365 return b & 0x00010000;
366 (void)a, (void)c, (void)d;
367 }
368 return 0;
369 }
370
371 static inline int vec_CPU_have_AVX512DQ(void)
372 {
373 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) {
374 int a, b, c, d;
375 VEC_CPU_CPUID(7, a, b, c, d);
376 return b & 0x00020000;
377 (void)a, (void)c, (void)d;
378 }
379 return 0;
380 }
381
382 static inline int vec_CPU_have_AVX512BW(void)
383 {
384 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) {
385 int a, b, c, d;
386 VEC_CPU_CPUID(7, a, b, c, d);
387 return b & 0x40000000;
366 (void)a, (void)c, (void)d; 388 (void)a, (void)c, (void)d;
367 } 389 }
368 return 0; 390 return 0;
369 } 391 }
370 392
489 vec_CPU_features |= VEC_CPU_HAS_AVX; 511 vec_CPU_features |= VEC_CPU_HAS_AVX;
490 if (vec_CPU_have_AVX2()) 512 if (vec_CPU_have_AVX2())
491 vec_CPU_features |= VEC_CPU_HAS_AVX2; 513 vec_CPU_features |= VEC_CPU_HAS_AVX2;
492 if (vec_CPU_have_AVX512F()) 514 if (vec_CPU_have_AVX512F())
493 vec_CPU_features |= VEC_CPU_HAS_AVX512F; 515 vec_CPU_features |= VEC_CPU_HAS_AVX512F;
516 if (vec_CPU_have_AVX512BW())
517 vec_CPU_features |= VEC_CPU_HAS_AVX512BW;
518 if (vec_CPU_have_AVX512DQ())
519 vec_CPU_features |= VEC_CPU_HAS_AVX512DQ;
494 if (vec_CPU_have_NEON()) 520 if (vec_CPU_have_NEON())
495 vec_CPU_features |= VEC_CPU_HAS_NEON; 521 vec_CPU_features |= VEC_CPU_HAS_NEON;
496 } 522 }
497 return vec_CPU_features; 523 return vec_CPU_features;
498 } 524 }