diff crc32.c @ 3:6483683ac857 default tip

*: add profiling code too; expand x86 to use all eight XMM registers basically ported verbatim from the assembly
author Paper <paper@tflc.us>
date Mon, 09 Feb 2026 21:30:30 -0500
parents 422835bc1aca
children
line wrap: on
line diff
--- a/crc32.c	Mon Feb 09 01:21:00 2026 -0500
+++ b/crc32.c	Mon Feb 09 21:30:30 2026 -0500
@@ -17,14 +17,12 @@
 		return;
 
 	/* Calculate size needed to align */
-	sz8 = align - ((uintptr_t)message % align);
+	sz8 = align - ((uintptr_t)*message % align);
 	szs = MIN(*sz, sz8);
 
 	*crc = crcfunc(*crc, *message, sz8);
 	*message += sz8;
 	*sz -= sz8;
-
-	if (szs == sz8) assert(ALIGNED(*message, align));
 }
 
 CRC32_API
@@ -39,12 +37,17 @@
 	crc = 0xFFFFFFFF;
 	crc32_align(&crc, crc32c_r, ALIGNOF(uint32_t), &message, &sz);
 	if (!sz) return ~crc;
-#ifdef __x86_64__
-	crc32_align(&crc, crc32qw_r, 16, &message, &sz);
-	if (!sz) return ~crc;
 
-	return ~crc32x86_vpclmulqdq_r(crc, message, sz);
-#else
+#if defined(__x86_64__) && defined(__GNUC__)
+	/* Check at runtime if we can use vpclmulqdq */
+	if (__builtin_cpu_supports("vpclmulqdq")) {
+		/* Align and do the rest with vpclmulqdq */
+		crc32_align(&crc, crc32qw_r, 16, &message, &sz);
+		if (!sz) return ~crc;
+
+		return ~crc32x86_vpclmulqdq_r(crc, message, sz);
+	} /* Otherwise just use 32-bit impl */
+#endif
+
 	return ~crc32qw_r(crc, message, sz);
-#endif
 }