changeset 0:422835bc1aca

*: checkin
author Paper <paper@tflc.us>
date Mon, 09 Feb 2026 01:15:00 -0500
parents
children 90cb48b87dcc
files LICENSE Makefile README crc32-table.c crc32-test.c crc32.c crc32.h crc32c.c crc32i.h crc32qw.c crc32x86-tab.h crc32x86.c
diffstat 12 files changed, 1611 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/LICENSE	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,58 @@
+Most of this project is licensed under the common zlib license:
+
+------------------------------------------------------------------------------
+
+Copyright (c) Paper 2026
+
+This software is provided 'as-is', without any express or implied
+warranty.  In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not
+   claim that you wrote the original software. If you use this software
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+
+------------------------------------------------------------------------------
+
+The code taking advantage of the x86-specific vector instructions was derived
+almost entirely from code written by Intel employees, released under the
+BSD 3-clause license:
+
+------------------------------------------------------------------------------
+
+Copyright(c) 2011-2015 Intel Corporation All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
+  * Neither the name of Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived
+    from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+------------------------------------------------------------------------------
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Makefile	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,5 @@
+crc32: crc32.o crc32-table.o crc32-test.o crc32c.o crc32qw.o crc32x86.o
+	$(CC) -o $@ $^
+
+clean:
+	$(RM) crc32 *.o
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,11 @@
+This code was mostly written as an experiment, to see just how much
+could be done using only the C preprocessor (give or take). Turns out
+you can do quite a lot.
+
+In fact, *all* of the tables in this library are generated at compile-time.
+The polynomial is #define'd in crc32i.h, and all of the tables are generated
+through a combination of enums and preprocessor trickery. Note that this
+may cause this code to compile slowly on some machines or compilers.
+
+At the moment it is hardcoded for x86-64 and gcc, but it could be adapted to
+other compilers if they also have features like e.g. alignas() or whatever.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32-table.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,58 @@
+#include "crc32i.h"
+
+/* This builds our CRC table at compile-time rather than runtime.
+ * Note: We should have a way to change the polynomial at runtime too. */
+
+#define CRC32_MASK(crc) (-((crc) & 1))
+
+/* Does one iteration of the 8-time loop to generate one byte of the CRC table. */
+#define CRC32_PRECALC_EX(crc) (((crc) >> 1) ^ ((CRC32_POLYNOMIAL) & CRC32_MASK(crc)))
+
+/* Does all eight iterations of the loop to generate one byte. */
+#define CRC32_PRECALC_E(byte) \
+	(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(byte)))))))))
+
+/* Simple wrapper of CRC32_PRECALC_E that converts everything to uint32_t */
+#define CRC32_PRECALC(byte) \
+	CRC32_PRECALC_E((uint32_t)(byte))
+
+#define CRC32_PRECALC_0(byte) \
+	CRC32_PRECALC(byte), CRC32_PRECALC((byte) | 0x01)
+
+#define CRC32_PRECALC_1(byte) \
+	CRC32_PRECALC_0(byte), CRC32_PRECALC_0((byte) | UINT32_C(0x02))
+
+#define CRC32_PRECALC_2(byte) \
+	CRC32_PRECALC_1(byte), CRC32_PRECALC_1((byte) | UINT32_C(0x04))
+
+#define CRC32_PRECALC_3(byte) \
+	CRC32_PRECALC_2(byte), CRC32_PRECALC_2((byte) | UINT32_C(0x08))
+
+#define CRC32_PRECALC_4(byte) \
+	CRC32_PRECALC_3(byte), CRC32_PRECALC_3((byte) | UINT32_C(0x10))
+
+#define CRC32_PRECALC_5(byte) \
+	CRC32_PRECALC_4(byte), CRC32_PRECALC_4((byte) | UINT32_C(0x20))
+
+#define CRC32_PRECALC_6(byte) \
+	CRC32_PRECALC_5(byte), CRC32_PRECALC_5((byte) | UINT32_C(0x40))
+
+#define CRC32_PRECALC_7(byte) \
+	CRC32_PRECALC_6(byte), CRC32_PRECALC_6((byte) | UINT32_C(0x80))
+
+const uint32_t crc32_tab[256] = {
+	CRC32_PRECALC_7(0)
+};
+
+#undef CRC32_MASK
+#undef CRC32_PRECALC_EX
+#undef CRC32_PRECALC_E
+#undef CRC32_PRECALC
+#undef CRC32_PRECALC_0
+#undef CRC32_PRECALC_1
+#undef CRC32_PRECALC_2
+#undef CRC32_PRECALC_3
+#undef CRC32_PRECALC_4
+#undef CRC32_PRECALC_5
+#undef CRC32_PRECALC_6
+#undef CRC32_PRECALC_7
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32-test.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,37 @@
+#include "crc32i.h"
+#include <stdio.h>
+
+/* Test implementations and make sure they agree with each other */
+int crc32_test(void)
+{
+	/* Force alignment :) */
+	static const __attribute__((__aligned__(CRC32_MAX_ALIGNMENT))) unsigned char testdata[1024] =
+#define DOUBLE(x) x x
+DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE("\x01\x02\x04\x08\x10\x20\x40\x80")))))))
+#undef DOUBLE
+	;
+	static const crc32_r_spec crc[] = {
+		crc32c_r,
+		crc32qw_r,
+		crc32x86_vpclmulqdq_r
+	};
+	size_t i;
+
+	uint32_t crcc = crc32(testdata, sizeof(testdata));
+
+	for (i = 0; i < ARRAY_SIZE(crc); i++) {
+		uint32_t thiscrc = ~crc[i](0xFFFFFFFF, testdata, sizeof(testdata));
+
+		if (thiscrc != crcc) {
+			fprintf(stderr, "%zu, mismatch: %08x, %08x\n", i, crcc, thiscrc);
+			return -1;
+		}
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return -crc32_test();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,50 @@
+#include "crc32.h"
+#include "crc32i.h"
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
+/* Align to a byte offset using the given crc function. */
+static void crc32_align(uint32_t *crc, crc32_r_spec crcfunc, size_t align, const unsigned char **message, size_t *sz)
+{
+	size_t sz8, szs;
+
+	/* Alignment check */
+	if (ALIGNED(*message, align))
+		return;
+
+	/* Calculate size needed to align */
+	sz8 = align - ((uintptr_t)message % align);
+	szs = MIN(*sz, sz8);
+
+	*crc = crcfunc(*crc, *message, sz8);
+	*message += sz8;
+	*sz -= sz8;
+
+	if (szs == sz8) assert(ALIGNED(*message, align));
+}
+
+CRC32_API
+uint32_t crc32(const unsigned char *message, size_t sz)
+{
+	uint32_t crc;
+	size_t i;
+
+	if (!sz)
+		return 0;
+
+	crc = 0xFFFFFFFF;
+	crc32_align(&crc, crc32c_r, ALIGNOF(uint32_t), &message, &sz);
+	if (!sz) return ~crc;
+#ifdef __x86_64__
+	crc32_align(&crc, crc32qw_r, 16, &message, &sz);
+	if (!sz) return ~crc;
+
+	return ~crc32x86_vpclmulqdq_r(crc, message, sz);
+#else
+	return ~crc32qw_r(crc, message, sz);
+#endif
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32.h	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,9 @@
+#ifndef CRC32_H_
+#define CRC32_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+uint32_t crc32(const unsigned char *msg, size_t sz);
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32c.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,11 @@
+#include "crc32i.h"
+
+uint32_t crc32c_r(uint32_t crc, const unsigned char *message, size_t sz)
+{
+	size_t i;
+
+	for (i = 0; i < sz; i++)
+		crc = (crc >> 8) ^ crc32_tab[(crc ^ message[i]) & 0xFF];
+
+	return crc;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32i.h	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,45 @@
+/* internal crc32 definitions */
+
+#include "crc32.h"
+
+#define CRC32_API __attribute__((__visibility__("default")))
+
+/* all LUTs etc. are generated at compile time.
+ * eventually, I'd like to have all internal info
+ * stored in one opaque pointer. this pointer will
+ * contain the LUTs (or maybe a pointer to one).
+ * We could also create these structures at compile
+ * time and have a flag that says whether it needs
+ * to be destroyed or if it can be cached. */
+#define CRC32_POLYNOMIAL 0xedb88320
+
+/* crc32b.c */
+#ifdef __GNUC__
+# define CRC32_PURE __attribute__((__pure__))
+#else
+# define CRC32_PURE
+#endif
+
+#define ALIGNOF(type) offsetof(struct { type a; char b; }, b)
+#define ALIGNED(ptr, alignment) (((uintptr_t)(ptr) % (alignment)) == 0)
+#define ALIGNED_TYPE(ptr, type) ALIGNED(ptr, ALIGNOF(type))
+
+typedef uint32_t (*crc32_r_spec)(uint32_t, const unsigned char *, size_t);
+
+/* shared by crc32c and crc32qw */
+extern const uint32_t crc32_tab[256];
+
+/* Calculates crc32 by bytes. Has no alignment requirement */
+uint32_t crc32c_r(uint32_t crc, const unsigned char *message, size_t sz);
+/* Calculates crc32 in dwords. Requires 4-byte alignment */
+uint32_t crc32qw_r(uint32_t crc, const unsigned char *message, size_t sz);
+/* Calculates crc32 using intel SIMD. Requires 16-byte alignment */
+uint32_t crc32x86_vpclmulqdq_r(uint32_t crc, const unsigned char *msg, size_t sz);
+
+/* Maximum alignment value for each impl to work */
+#define MAX(x, y) ((x)>(y)?(x):(y))
+#define MIN(x, y) ((x)<(y)?(x):(y))
+#define CRC32_MAX_ALIGNMENT MAX(16, ALIGNOF(uint32_t))
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32qw.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,36 @@
+#include "crc32i.h"
+
+uint32_t crc32qw_r(uint32_t crc, const unsigned char *message, size_t sz)
+{
+	while (sz >= 4) {
+		union {
+			unsigned char b[4];
+			uint32_t w;
+		} w;
+
+#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2)
+		/* Tell GCC that we will be aliasing */
+		w.w = *(__attribute__((__may_alias__)) uint32_t *)message;
+#else
+		/* ... */
+		w.b[0] = message[0];
+		w.b[1] = message[1];
+		w.b[2] = message[2];
+		w.b[3] = message[3];
+#endif
+
+		crc ^= w.w;
+
+		crc = (crc >> 8) ^ crc32_tab[crc & 0xFF];
+		crc = (crc >> 8) ^ crc32_tab[crc & 0xFF];
+		crc = (crc >> 8) ^ crc32_tab[crc & 0xFF];
+		crc = (crc >> 8) ^ crc32_tab[crc & 0xFF];
+
+		message += 4;
+		sz -= 4;
+	}
+
+	if (!sz) return crc;
+
+	return crc32c_r(crc, message, sz);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32x86-tab.h	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,1088 @@
+XNDIVP_MOD_ITER(1, 0)
+XNDIVP_DIV_ITER(1, 0)
+XNDIVP_MOD_ITER(2, 1)
+XNDIVP_DIV_ITER(2, 1)
+XNDIVP_MOD_ITER(3, 2)
+XNDIVP_DIV_ITER(3, 2)
+XNDIVP_MOD_ITER(4, 3)
+XNDIVP_DIV_ITER(4, 3)
+XNDIVP_MOD_ITER(5, 4)
+XNDIVP_DIV_ITER(5, 4)
+XNDIVP_MOD_ITER(6, 5)
+XNDIVP_DIV_ITER(6, 5)
+XNDIVP_MOD_ITER(7, 6)
+XNDIVP_DIV_ITER(7, 6)
+XNDIVP_MOD_ITER(8, 7)
+XNDIVP_DIV_ITER(8, 7)
+XNDIVP_MOD_ITER(9, 8)
+XNDIVP_DIV_ITER(9, 8)
+XNDIVP_MOD_ITER(10, 9)
+XNDIVP_DIV_ITER(10, 9)
+XNDIVP_MOD_ITER(11, 10)
+XNDIVP_DIV_ITER(11, 10)
+XNDIVP_MOD_ITER(12, 11)
+XNDIVP_DIV_ITER(12, 11)
+XNDIVP_MOD_ITER(13, 12)
+XNDIVP_DIV_ITER(13, 12)
+XNDIVP_MOD_ITER(14, 13)
+XNDIVP_DIV_ITER(14, 13)
+XNDIVP_MOD_ITER(15, 14)
+XNDIVP_DIV_ITER(15, 14)
+XNDIVP_MOD_ITER(16, 15)
+XNDIVP_DIV_ITER(16, 15)
+XNDIVP_MOD_ITER(17, 16)
+XNDIVP_DIV_ITER(17, 16)
+XNDIVP_MOD_ITER(18, 17)
+XNDIVP_DIV_ITER(18, 17)
+XNDIVP_MOD_ITER(19, 18)
+XNDIVP_DIV_ITER(19, 18)
+XNDIVP_MOD_ITER(20, 19)
+XNDIVP_DIV_ITER(20, 19)
+XNDIVP_MOD_ITER(21, 20)
+XNDIVP_DIV_ITER(21, 20)
+XNDIVP_MOD_ITER(22, 21)
+XNDIVP_DIV_ITER(22, 21)
+XNDIVP_MOD_ITER(23, 22)
+XNDIVP_DIV_ITER(23, 22)
+XNDIVP_MOD_ITER(24, 23)
+XNDIVP_DIV_ITER(24, 23)
+XNDIVP_MOD_ITER(25, 24)
+XNDIVP_DIV_ITER(25, 24)
+XNDIVP_MOD_ITER(26, 25)
+XNDIVP_DIV_ITER(26, 25)
+XNDIVP_MOD_ITER(27, 26)
+XNDIVP_DIV_ITER(27, 26)
+XNDIVP_MOD_ITER(28, 27)
+XNDIVP_DIV_ITER(28, 27)
+XNDIVP_MOD_ITER(29, 28)
+XNDIVP_DIV_ITER(29, 28)
+XNDIVP_MOD_ITER(30, 29)
+XNDIVP_DIV_ITER(30, 29)
+XNDIVP_MOD_ITER(31, 30)
+XNDIVP_DIV_ITER(31, 30)
+XNDIVP_MOD_ITER(32, 31)
+XNDIVP_DIV_ITER(32, 31)
+XNDIVP_MOD_ITER(33, 32)
+XNDIVP_DIV_ITER(33, 32)
+XNDIVP_MOD_ITER(34, 33)
+XNDIVP_DIV_ITER(34, 33)
+XNDIVP_MOD_ITER(35, 34)
+XNDIVP_DIV_ITER(35, 34)
+XNDIVP_MOD_ITER(36, 35)
+XNDIVP_DIV_ITER(36, 35)
+XNDIVP_MOD_ITER(37, 36)
+XNDIVP_DIV_ITER(37, 36)
+XNDIVP_MOD_ITER(38, 37)
+XNDIVP_DIV_ITER(38, 37)
+XNDIVP_MOD_ITER(39, 38)
+XNDIVP_DIV_ITER(39, 38)
+XNDIVP_MOD_ITER(40, 39)
+XNDIVP_DIV_ITER(40, 39)
+XNDIVP_MOD_ITER(41, 40)
+XNDIVP_DIV_ITER(41, 40)
+XNDIVP_MOD_ITER(42, 41)
+XNDIVP_DIV_ITER(42, 41)
+XNDIVP_MOD_ITER(43, 42)
+XNDIVP_DIV_ITER(43, 42)
+XNDIVP_MOD_ITER(44, 43)
+XNDIVP_DIV_ITER(44, 43)
+XNDIVP_MOD_ITER(45, 44)
+XNDIVP_DIV_ITER(45, 44)
+XNDIVP_MOD_ITER(46, 45)
+XNDIVP_DIV_ITER(46, 45)
+XNDIVP_MOD_ITER(47, 46)
+XNDIVP_DIV_ITER(47, 46)
+XNDIVP_MOD_ITER(48, 47)
+XNDIVP_DIV_ITER(48, 47)
+XNDIVP_MOD_ITER(49, 48)
+XNDIVP_DIV_ITER(49, 48)
+XNDIVP_MOD_ITER(50, 49)
+XNDIVP_DIV_ITER(50, 49)
+XNDIVP_MOD_ITER(51, 50)
+XNDIVP_DIV_ITER(51, 50)
+XNDIVP_MOD_ITER(52, 51)
+XNDIVP_DIV_ITER(52, 51)
+XNDIVP_MOD_ITER(53, 52)
+XNDIVP_DIV_ITER(53, 52)
+XNDIVP_MOD_ITER(54, 53)
+XNDIVP_DIV_ITER(54, 53)
+XNDIVP_MOD_ITER(55, 54)
+XNDIVP_DIV_ITER(55, 54)
+XNDIVP_MOD_ITER(56, 55)
+XNDIVP_DIV_ITER(56, 55)
+XNDIVP_MOD_ITER(57, 56)
+XNDIVP_DIV_ITER(57, 56)
+XNDIVP_MOD_ITER(58, 57)
+XNDIVP_DIV_ITER(58, 57)
+XNDIVP_MOD_ITER(59, 58)
+XNDIVP_DIV_ITER(59, 58)
+XNDIVP_MOD_ITER(60, 59)
+XNDIVP_DIV_ITER(60, 59)
+XNDIVP_MOD_ITER(61, 60)
+XNDIVP_DIV_ITER(61, 60)
+XNDIVP_MOD_ITER(62, 61)
+XNDIVP_DIV_ITER(62, 61)
+XNDIVP_MOD_ITER(63, 62)
+XNDIVP_DIV_ITER(63, 62)
+XNDIVP_MOD_ITER(64, 63)
+XNDIVP_DIV_ITER(64, 63)
+XNDIVP_MOD_ITER(65, 64)
+XNDIVP_DIV_ITER(65, 64)
+XNDIVP_MOD_ITER(66, 65)
+XNDIVP_DIV_ITER(66, 65)
+XNDIVP_MOD_ITER(67, 66)
+XNDIVP_DIV_ITER(67, 66)
+XNDIVP_MOD_ITER(68, 67)
+XNDIVP_DIV_ITER(68, 67)
+XNDIVP_MOD_ITER(69, 68)
+XNDIVP_DIV_ITER(69, 68)
+XNDIVP_MOD_ITER(70, 69)
+XNDIVP_DIV_ITER(70, 69)
+XNDIVP_MOD_ITER(71, 70)
+XNDIVP_DIV_ITER(71, 70)
+XNDIVP_MOD_ITER(72, 71)
+XNDIVP_DIV_ITER(72, 71)
+XNDIVP_MOD_ITER(73, 72)
+XNDIVP_DIV_ITER(73, 72)
+XNDIVP_MOD_ITER(74, 73)
+XNDIVP_DIV_ITER(74, 73)
+XNDIVP_MOD_ITER(75, 74)
+XNDIVP_DIV_ITER(75, 74)
+XNDIVP_MOD_ITER(76, 75)
+XNDIVP_DIV_ITER(76, 75)
+XNDIVP_MOD_ITER(77, 76)
+XNDIVP_DIV_ITER(77, 76)
+XNDIVP_MOD_ITER(78, 77)
+XNDIVP_DIV_ITER(78, 77)
+XNDIVP_MOD_ITER(79, 78)
+XNDIVP_DIV_ITER(79, 78)
+XNDIVP_MOD_ITER(80, 79)
+XNDIVP_DIV_ITER(80, 79)
+XNDIVP_MOD_ITER(81, 80)
+XNDIVP_DIV_ITER(81, 80)
+XNDIVP_MOD_ITER(82, 81)
+XNDIVP_DIV_ITER(82, 81)
+XNDIVP_MOD_ITER(83, 82)
+XNDIVP_DIV_ITER(83, 82)
+XNDIVP_MOD_ITER(84, 83)
+XNDIVP_DIV_ITER(84, 83)
+XNDIVP_MOD_ITER(85, 84)
+XNDIVP_DIV_ITER(85, 84)
+XNDIVP_MOD_ITER(86, 85)
+XNDIVP_DIV_ITER(86, 85)
+XNDIVP_MOD_ITER(87, 86)
+XNDIVP_DIV_ITER(87, 86)
+XNDIVP_MOD_ITER(88, 87)
+XNDIVP_DIV_ITER(88, 87)
+XNDIVP_MOD_ITER(89, 88)
+XNDIVP_DIV_ITER(89, 88)
+XNDIVP_MOD_ITER(90, 89)
+XNDIVP_DIV_ITER(90, 89)
+XNDIVP_MOD_ITER(91, 90)
+XNDIVP_DIV_ITER(91, 90)
+XNDIVP_MOD_ITER(92, 91)
+XNDIVP_DIV_ITER(92, 91)
+XNDIVP_MOD_ITER(93, 92)
+XNDIVP_DIV_ITER(93, 92)
+XNDIVP_MOD_ITER(94, 93)
+XNDIVP_DIV_ITER(94, 93)
+XNDIVP_MOD_ITER(95, 94)
+XNDIVP_DIV_ITER(95, 94)
+XNDIVP_MOD_ITER(96, 95)
+XNDIVP_DIV_ITER(96, 95)
+XNDIVP_MOD_ITER(97, 96)
+XNDIVP_DIV_ITER(97, 96)
+XNDIVP_MOD_ITER(98, 97)
+XNDIVP_DIV_ITER(98, 97)
+XNDIVP_MOD_ITER(99, 98)
+XNDIVP_DIV_ITER(99, 98)
+XNDIVP_MOD_ITER(100, 99)
+XNDIVP_DIV_ITER(100, 99)
+XNDIVP_MOD_ITER(101, 100)
+XNDIVP_DIV_ITER(101, 100)
+XNDIVP_MOD_ITER(102, 101)
+XNDIVP_DIV_ITER(102, 101)
+XNDIVP_MOD_ITER(103, 102)
+XNDIVP_DIV_ITER(103, 102)
+XNDIVP_MOD_ITER(104, 103)
+XNDIVP_DIV_ITER(104, 103)
+XNDIVP_MOD_ITER(105, 104)
+XNDIVP_DIV_ITER(105, 104)
+XNDIVP_MOD_ITER(106, 105)
+XNDIVP_DIV_ITER(106, 105)
+XNDIVP_MOD_ITER(107, 106)
+XNDIVP_DIV_ITER(107, 106)
+XNDIVP_MOD_ITER(108, 107)
+XNDIVP_DIV_ITER(108, 107)
+XNDIVP_MOD_ITER(109, 108)
+XNDIVP_DIV_ITER(109, 108)
+XNDIVP_MOD_ITER(110, 109)
+XNDIVP_DIV_ITER(110, 109)
+XNDIVP_MOD_ITER(111, 110)
+XNDIVP_DIV_ITER(111, 110)
+XNDIVP_MOD_ITER(112, 111)
+XNDIVP_DIV_ITER(112, 111)
+XNDIVP_MOD_ITER(113, 112)
+XNDIVP_DIV_ITER(113, 112)
+XNDIVP_MOD_ITER(114, 113)
+XNDIVP_DIV_ITER(114, 113)
+XNDIVP_MOD_ITER(115, 114)
+XNDIVP_DIV_ITER(115, 114)
+XNDIVP_MOD_ITER(116, 115)
+XNDIVP_DIV_ITER(116, 115)
+XNDIVP_MOD_ITER(117, 116)
+XNDIVP_DIV_ITER(117, 116)
+XNDIVP_MOD_ITER(118, 117)
+XNDIVP_DIV_ITER(118, 117)
+XNDIVP_MOD_ITER(119, 118)
+XNDIVP_DIV_ITER(119, 118)
+XNDIVP_MOD_ITER(120, 119)
+XNDIVP_DIV_ITER(120, 119)
+XNDIVP_MOD_ITER(121, 120)
+XNDIVP_DIV_ITER(121, 120)
+XNDIVP_MOD_ITER(122, 121)
+XNDIVP_DIV_ITER(122, 121)
+XNDIVP_MOD_ITER(123, 122)
+XNDIVP_DIV_ITER(123, 122)
+XNDIVP_MOD_ITER(124, 123)
+XNDIVP_DIV_ITER(124, 123)
+XNDIVP_MOD_ITER(125, 124)
+XNDIVP_DIV_ITER(125, 124)
+XNDIVP_MOD_ITER(126, 125)
+XNDIVP_DIV_ITER(126, 125)
+XNDIVP_MOD_ITER(127, 126)
+XNDIVP_DIV_ITER(127, 126)
+XNDIVP_MOD_ITER(128, 127)
+XNDIVP_DIV_ITER(128, 127)
+XNDIVP_MOD_ITER(129, 128)
+XNDIVP_DIV_ITER(129, 128)
+XNDIVP_MOD_ITER(130, 129)
+XNDIVP_DIV_ITER(130, 129)
+XNDIVP_MOD_ITER(131, 130)
+XNDIVP_DIV_ITER(131, 130)
+XNDIVP_MOD_ITER(132, 131)
+XNDIVP_DIV_ITER(132, 131)
+XNDIVP_MOD_ITER(133, 132)
+XNDIVP_DIV_ITER(133, 132)
+XNDIVP_MOD_ITER(134, 133)
+XNDIVP_DIV_ITER(134, 133)
+XNDIVP_MOD_ITER(135, 134)
+XNDIVP_DIV_ITER(135, 134)
+XNDIVP_MOD_ITER(136, 135)
+XNDIVP_DIV_ITER(136, 135)
+XNDIVP_MOD_ITER(137, 136)
+XNDIVP_DIV_ITER(137, 136)
+XNDIVP_MOD_ITER(138, 137)
+XNDIVP_DIV_ITER(138, 137)
+XNDIVP_MOD_ITER(139, 138)
+XNDIVP_DIV_ITER(139, 138)
+XNDIVP_MOD_ITER(140, 139)
+XNDIVP_DIV_ITER(140, 139)
+XNDIVP_MOD_ITER(141, 140)
+XNDIVP_DIV_ITER(141, 140)
+XNDIVP_MOD_ITER(142, 141)
+XNDIVP_DIV_ITER(142, 141)
+XNDIVP_MOD_ITER(143, 142)
+XNDIVP_DIV_ITER(143, 142)
+XNDIVP_MOD_ITER(144, 143)
+XNDIVP_DIV_ITER(144, 143)
+XNDIVP_MOD_ITER(145, 144)
+XNDIVP_DIV_ITER(145, 144)
+XNDIVP_MOD_ITER(146, 145)
+XNDIVP_DIV_ITER(146, 145)
+XNDIVP_MOD_ITER(147, 146)
+XNDIVP_DIV_ITER(147, 146)
+XNDIVP_MOD_ITER(148, 147)
+XNDIVP_DIV_ITER(148, 147)
+XNDIVP_MOD_ITER(149, 148)
+XNDIVP_DIV_ITER(149, 148)
+XNDIVP_MOD_ITER(150, 149)
+XNDIVP_DIV_ITER(150, 149)
+XNDIVP_MOD_ITER(151, 150)
+XNDIVP_DIV_ITER(151, 150)
+XNDIVP_MOD_ITER(152, 151)
+XNDIVP_DIV_ITER(152, 151)
+XNDIVP_MOD_ITER(153, 152)
+XNDIVP_DIV_ITER(153, 152)
+XNDIVP_MOD_ITER(154, 153)
+XNDIVP_DIV_ITER(154, 153)
+XNDIVP_MOD_ITER(155, 154)
+XNDIVP_DIV_ITER(155, 154)
+XNDIVP_MOD_ITER(156, 155)
+XNDIVP_DIV_ITER(156, 155)
+XNDIVP_MOD_ITER(157, 156)
+XNDIVP_DIV_ITER(157, 156)
+XNDIVP_MOD_ITER(158, 157)
+XNDIVP_DIV_ITER(158, 157)
+XNDIVP_MOD_ITER(159, 158)
+XNDIVP_DIV_ITER(159, 158)
+XNDIVP_MOD_ITER(160, 159)
+XNDIVP_DIV_ITER(160, 159)
+XNDIVP_MOD_ITER(161, 160)
+XNDIVP_DIV_ITER(161, 160)
+XNDIVP_MOD_ITER(162, 161)
+XNDIVP_DIV_ITER(162, 161)
+XNDIVP_MOD_ITER(163, 162)
+XNDIVP_DIV_ITER(163, 162)
+XNDIVP_MOD_ITER(164, 163)
+XNDIVP_DIV_ITER(164, 163)
+XNDIVP_MOD_ITER(165, 164)
+XNDIVP_DIV_ITER(165, 164)
+XNDIVP_MOD_ITER(166, 165)
+XNDIVP_DIV_ITER(166, 165)
+XNDIVP_MOD_ITER(167, 166)
+XNDIVP_DIV_ITER(167, 166)
+XNDIVP_MOD_ITER(168, 167)
+XNDIVP_DIV_ITER(168, 167)
+XNDIVP_MOD_ITER(169, 168)
+XNDIVP_DIV_ITER(169, 168)
+XNDIVP_MOD_ITER(170, 169)
+XNDIVP_DIV_ITER(170, 169)
+XNDIVP_MOD_ITER(171, 170)
+XNDIVP_DIV_ITER(171, 170)
+XNDIVP_MOD_ITER(172, 171)
+XNDIVP_DIV_ITER(172, 171)
+XNDIVP_MOD_ITER(173, 172)
+XNDIVP_DIV_ITER(173, 172)
+XNDIVP_MOD_ITER(174, 173)
+XNDIVP_DIV_ITER(174, 173)
+XNDIVP_MOD_ITER(175, 174)
+XNDIVP_DIV_ITER(175, 174)
+XNDIVP_MOD_ITER(176, 175)
+XNDIVP_DIV_ITER(176, 175)
+XNDIVP_MOD_ITER(177, 176)
+XNDIVP_DIV_ITER(177, 176)
+XNDIVP_MOD_ITER(178, 177)
+XNDIVP_DIV_ITER(178, 177)
+XNDIVP_MOD_ITER(179, 178)
+XNDIVP_DIV_ITER(179, 178)
+XNDIVP_MOD_ITER(180, 179)
+XNDIVP_DIV_ITER(180, 179)
+XNDIVP_MOD_ITER(181, 180)
+XNDIVP_DIV_ITER(181, 180)
+XNDIVP_MOD_ITER(182, 181)
+XNDIVP_DIV_ITER(182, 181)
+XNDIVP_MOD_ITER(183, 182)
+XNDIVP_DIV_ITER(183, 182)
+XNDIVP_MOD_ITER(184, 183)
+XNDIVP_DIV_ITER(184, 183)
+XNDIVP_MOD_ITER(185, 184)
+XNDIVP_DIV_ITER(185, 184)
+XNDIVP_MOD_ITER(186, 185)
+XNDIVP_DIV_ITER(186, 185)
+XNDIVP_MOD_ITER(187, 186)
+XNDIVP_DIV_ITER(187, 186)
+XNDIVP_MOD_ITER(188, 187)
+XNDIVP_DIV_ITER(188, 187)
+XNDIVP_MOD_ITER(189, 188)
+XNDIVP_DIV_ITER(189, 188)
+XNDIVP_MOD_ITER(190, 189)
+XNDIVP_DIV_ITER(190, 189)
+XNDIVP_MOD_ITER(191, 190)
+XNDIVP_DIV_ITER(191, 190)
+XNDIVP_MOD_ITER(192, 191)
+XNDIVP_DIV_ITER(192, 191)
+XNDIVP_MOD_ITER(193, 192)
+XNDIVP_DIV_ITER(193, 192)
+XNDIVP_MOD_ITER(194, 193)
+XNDIVP_DIV_ITER(194, 193)
+XNDIVP_MOD_ITER(195, 194)
+XNDIVP_DIV_ITER(195, 194)
+XNDIVP_MOD_ITER(196, 195)
+XNDIVP_DIV_ITER(196, 195)
+XNDIVP_MOD_ITER(197, 196)
+XNDIVP_DIV_ITER(197, 196)
+XNDIVP_MOD_ITER(198, 197)
+XNDIVP_DIV_ITER(198, 197)
+XNDIVP_MOD_ITER(199, 198)
+XNDIVP_DIV_ITER(199, 198)
+XNDIVP_MOD_ITER(200, 199)
+XNDIVP_DIV_ITER(200, 199)
+XNDIVP_MOD_ITER(201, 200)
+XNDIVP_DIV_ITER(201, 200)
+XNDIVP_MOD_ITER(202, 201)
+XNDIVP_DIV_ITER(202, 201)
+XNDIVP_MOD_ITER(203, 202)
+XNDIVP_DIV_ITER(203, 202)
+XNDIVP_MOD_ITER(204, 203)
+XNDIVP_DIV_ITER(204, 203)
+XNDIVP_MOD_ITER(205, 204)
+XNDIVP_DIV_ITER(205, 204)
+XNDIVP_MOD_ITER(206, 205)
+XNDIVP_DIV_ITER(206, 205)
+XNDIVP_MOD_ITER(207, 206)
+XNDIVP_DIV_ITER(207, 206)
+XNDIVP_MOD_ITER(208, 207)
+XNDIVP_DIV_ITER(208, 207)
+XNDIVP_MOD_ITER(209, 208)
+XNDIVP_DIV_ITER(209, 208)
+XNDIVP_MOD_ITER(210, 209)
+XNDIVP_DIV_ITER(210, 209)
+XNDIVP_MOD_ITER(211, 210)
+XNDIVP_DIV_ITER(211, 210)
+XNDIVP_MOD_ITER(212, 211)
+XNDIVP_DIV_ITER(212, 211)
+XNDIVP_MOD_ITER(213, 212)
+XNDIVP_DIV_ITER(213, 212)
+XNDIVP_MOD_ITER(214, 213)
+XNDIVP_DIV_ITER(214, 213)
+XNDIVP_MOD_ITER(215, 214)
+XNDIVP_DIV_ITER(215, 214)
+XNDIVP_MOD_ITER(216, 215)
+XNDIVP_DIV_ITER(216, 215)
+XNDIVP_MOD_ITER(217, 216)
+XNDIVP_DIV_ITER(217, 216)
+XNDIVP_MOD_ITER(218, 217)
+XNDIVP_DIV_ITER(218, 217)
+XNDIVP_MOD_ITER(219, 218)
+XNDIVP_DIV_ITER(219, 218)
+XNDIVP_MOD_ITER(220, 219)
+XNDIVP_DIV_ITER(220, 219)
+XNDIVP_MOD_ITER(221, 220)
+XNDIVP_DIV_ITER(221, 220)
+XNDIVP_MOD_ITER(222, 221)
+XNDIVP_DIV_ITER(222, 221)
+XNDIVP_MOD_ITER(223, 222)
+XNDIVP_DIV_ITER(223, 222)
+XNDIVP_MOD_ITER(224, 223)
+XNDIVP_DIV_ITER(224, 223)
+XNDIVP_MOD_ITER(225, 224)
+XNDIVP_DIV_ITER(225, 224)
+XNDIVP_MOD_ITER(226, 225)
+XNDIVP_DIV_ITER(226, 225)
+XNDIVP_MOD_ITER(227, 226)
+XNDIVP_DIV_ITER(227, 226)
+XNDIVP_MOD_ITER(228, 227)
+XNDIVP_DIV_ITER(228, 227)
+XNDIVP_MOD_ITER(229, 228)
+XNDIVP_DIV_ITER(229, 228)
+XNDIVP_MOD_ITER(230, 229)
+XNDIVP_DIV_ITER(230, 229)
+XNDIVP_MOD_ITER(231, 230)
+XNDIVP_DIV_ITER(231, 230)
+XNDIVP_MOD_ITER(232, 231)
+XNDIVP_DIV_ITER(232, 231)
+XNDIVP_MOD_ITER(233, 232)
+XNDIVP_DIV_ITER(233, 232)
+XNDIVP_MOD_ITER(234, 233)
+XNDIVP_DIV_ITER(234, 233)
+XNDIVP_MOD_ITER(235, 234)
+XNDIVP_DIV_ITER(235, 234)
+XNDIVP_MOD_ITER(236, 235)
+XNDIVP_DIV_ITER(236, 235)
+XNDIVP_MOD_ITER(237, 236)
+XNDIVP_DIV_ITER(237, 236)
+XNDIVP_MOD_ITER(238, 237)
+XNDIVP_DIV_ITER(238, 237)
+XNDIVP_MOD_ITER(239, 238)
+XNDIVP_DIV_ITER(239, 238)
+XNDIVP_MOD_ITER(240, 239)
+XNDIVP_DIV_ITER(240, 239)
+XNDIVP_MOD_ITER(241, 240)
+XNDIVP_DIV_ITER(241, 240)
+XNDIVP_MOD_ITER(242, 241)
+XNDIVP_DIV_ITER(242, 241)
+XNDIVP_MOD_ITER(243, 242)
+XNDIVP_DIV_ITER(243, 242)
+XNDIVP_MOD_ITER(244, 243)
+XNDIVP_DIV_ITER(244, 243)
+XNDIVP_MOD_ITER(245, 244)
+XNDIVP_DIV_ITER(245, 244)
+XNDIVP_MOD_ITER(246, 245)
+XNDIVP_DIV_ITER(246, 245)
+XNDIVP_MOD_ITER(247, 246)
+XNDIVP_DIV_ITER(247, 246)
+XNDIVP_MOD_ITER(248, 247)
+XNDIVP_DIV_ITER(248, 247)
+XNDIVP_MOD_ITER(249, 248)
+XNDIVP_DIV_ITER(249, 248)
+XNDIVP_MOD_ITER(250, 249)
+XNDIVP_DIV_ITER(250, 249)
+XNDIVP_MOD_ITER(251, 250)
+XNDIVP_DIV_ITER(251, 250)
+XNDIVP_MOD_ITER(252, 251)
+XNDIVP_DIV_ITER(252, 251)
+XNDIVP_MOD_ITER(253, 252)
+XNDIVP_DIV_ITER(253, 252)
+XNDIVP_MOD_ITER(254, 253)
+XNDIVP_DIV_ITER(254, 253)
+XNDIVP_MOD_ITER(255, 254)
+XNDIVP_DIV_ITER(255, 254)
+XNDIVP_MOD_ITER(256, 255)
+XNDIVP_DIV_ITER(256, 255)
+XNDIVP_MOD_ITER(257, 256)
+XNDIVP_DIV_ITER(257, 256)
+XNDIVP_MOD_ITER(258, 257)
+XNDIVP_DIV_ITER(258, 257)
+XNDIVP_MOD_ITER(259, 258)
+XNDIVP_DIV_ITER(259, 258)
+XNDIVP_MOD_ITER(260, 259)
+XNDIVP_DIV_ITER(260, 259)
+XNDIVP_MOD_ITER(261, 260)
+XNDIVP_DIV_ITER(261, 260)
+XNDIVP_MOD_ITER(262, 261)
+XNDIVP_DIV_ITER(262, 261)
+XNDIVP_MOD_ITER(263, 262)
+XNDIVP_DIV_ITER(263, 262)
+XNDIVP_MOD_ITER(264, 263)
+XNDIVP_DIV_ITER(264, 263)
+XNDIVP_MOD_ITER(265, 264)
+XNDIVP_DIV_ITER(265, 264)
+XNDIVP_MOD_ITER(266, 265)
+XNDIVP_DIV_ITER(266, 265)
+XNDIVP_MOD_ITER(267, 266)
+XNDIVP_DIV_ITER(267, 266)
+XNDIVP_MOD_ITER(268, 267)
+XNDIVP_DIV_ITER(268, 267)
+XNDIVP_MOD_ITER(269, 268)
+XNDIVP_DIV_ITER(269, 268)
+XNDIVP_MOD_ITER(270, 269)
+XNDIVP_DIV_ITER(270, 269)
+XNDIVP_MOD_ITER(271, 270)
+XNDIVP_DIV_ITER(271, 270)
+XNDIVP_MOD_ITER(272, 271)
+XNDIVP_DIV_ITER(272, 271)
+XNDIVP_MOD_ITER(273, 272)
+XNDIVP_DIV_ITER(273, 272)
+XNDIVP_MOD_ITER(274, 273)
+XNDIVP_DIV_ITER(274, 273)
+XNDIVP_MOD_ITER(275, 274)
+XNDIVP_DIV_ITER(275, 274)
+XNDIVP_MOD_ITER(276, 275)
+XNDIVP_DIV_ITER(276, 275)
+XNDIVP_MOD_ITER(277, 276)
+XNDIVP_DIV_ITER(277, 276)
+XNDIVP_MOD_ITER(278, 277)
+XNDIVP_DIV_ITER(278, 277)
+XNDIVP_MOD_ITER(279, 278)
+XNDIVP_DIV_ITER(279, 278)
+XNDIVP_MOD_ITER(280, 279)
+XNDIVP_DIV_ITER(280, 279)
+XNDIVP_MOD_ITER(281, 280)
+XNDIVP_DIV_ITER(281, 280)
+XNDIVP_MOD_ITER(282, 281)
+XNDIVP_DIV_ITER(282, 281)
+XNDIVP_MOD_ITER(283, 282)
+XNDIVP_DIV_ITER(283, 282)
+XNDIVP_MOD_ITER(284, 283)
+XNDIVP_DIV_ITER(284, 283)
+XNDIVP_MOD_ITER(285, 284)
+XNDIVP_DIV_ITER(285, 284)
+XNDIVP_MOD_ITER(286, 285)
+XNDIVP_DIV_ITER(286, 285)
+XNDIVP_MOD_ITER(287, 286)
+XNDIVP_DIV_ITER(287, 286)
+XNDIVP_MOD_ITER(288, 287)
+XNDIVP_DIV_ITER(288, 287)
+XNDIVP_MOD_ITER(289, 288)
+XNDIVP_DIV_ITER(289, 288)
+XNDIVP_MOD_ITER(290, 289)
+XNDIVP_DIV_ITER(290, 289)
+XNDIVP_MOD_ITER(291, 290)
+XNDIVP_DIV_ITER(291, 290)
+XNDIVP_MOD_ITER(292, 291)
+XNDIVP_DIV_ITER(292, 291)
+XNDIVP_MOD_ITER(293, 292)
+XNDIVP_DIV_ITER(293, 292)
+XNDIVP_MOD_ITER(294, 293)
+XNDIVP_DIV_ITER(294, 293)
+XNDIVP_MOD_ITER(295, 294)
+XNDIVP_DIV_ITER(295, 294)
+XNDIVP_MOD_ITER(296, 295)
+XNDIVP_DIV_ITER(296, 295)
+XNDIVP_MOD_ITER(297, 296)
+XNDIVP_DIV_ITER(297, 296)
+XNDIVP_MOD_ITER(298, 297)
+XNDIVP_DIV_ITER(298, 297)
+XNDIVP_MOD_ITER(299, 298)
+XNDIVP_DIV_ITER(299, 298)
+XNDIVP_MOD_ITER(300, 299)
+XNDIVP_DIV_ITER(300, 299)
+XNDIVP_MOD_ITER(301, 300)
+XNDIVP_DIV_ITER(301, 300)
+XNDIVP_MOD_ITER(302, 301)
+XNDIVP_DIV_ITER(302, 301)
+XNDIVP_MOD_ITER(303, 302)
+XNDIVP_DIV_ITER(303, 302)
+XNDIVP_MOD_ITER(304, 303)
+XNDIVP_DIV_ITER(304, 303)
+XNDIVP_MOD_ITER(305, 304)
+XNDIVP_DIV_ITER(305, 304)
+XNDIVP_MOD_ITER(306, 305)
+XNDIVP_DIV_ITER(306, 305)
+XNDIVP_MOD_ITER(307, 306)
+XNDIVP_DIV_ITER(307, 306)
+XNDIVP_MOD_ITER(308, 307)
+XNDIVP_DIV_ITER(308, 307)
+XNDIVP_MOD_ITER(309, 308)
+XNDIVP_DIV_ITER(309, 308)
+XNDIVP_MOD_ITER(310, 309)
+XNDIVP_DIV_ITER(310, 309)
+XNDIVP_MOD_ITER(311, 310)
+XNDIVP_DIV_ITER(311, 310)
+XNDIVP_MOD_ITER(312, 311)
+XNDIVP_DIV_ITER(312, 311)
+XNDIVP_MOD_ITER(313, 312)
+XNDIVP_DIV_ITER(313, 312)
+XNDIVP_MOD_ITER(314, 313)
+XNDIVP_DIV_ITER(314, 313)
+XNDIVP_MOD_ITER(315, 314)
+XNDIVP_DIV_ITER(315, 314)
+XNDIVP_MOD_ITER(316, 315)
+XNDIVP_DIV_ITER(316, 315)
+XNDIVP_MOD_ITER(317, 316)
+XNDIVP_DIV_ITER(317, 316)
+XNDIVP_MOD_ITER(318, 317)
+XNDIVP_DIV_ITER(318, 317)
+XNDIVP_MOD_ITER(319, 318)
+XNDIVP_DIV_ITER(319, 318)
+XNDIVP_MOD_ITER(320, 319)
+XNDIVP_DIV_ITER(320, 319)
+XNDIVP_MOD_ITER(321, 320)
+XNDIVP_DIV_ITER(321, 320)
+XNDIVP_MOD_ITER(322, 321)
+XNDIVP_DIV_ITER(322, 321)
+XNDIVP_MOD_ITER(323, 322)
+XNDIVP_DIV_ITER(323, 322)
+XNDIVP_MOD_ITER(324, 323)
+XNDIVP_DIV_ITER(324, 323)
+XNDIVP_MOD_ITER(325, 324)
+XNDIVP_DIV_ITER(325, 324)
+XNDIVP_MOD_ITER(326, 325)
+XNDIVP_DIV_ITER(326, 325)
+XNDIVP_MOD_ITER(327, 326)
+XNDIVP_DIV_ITER(327, 326)
+XNDIVP_MOD_ITER(328, 327)
+XNDIVP_DIV_ITER(328, 327)
+XNDIVP_MOD_ITER(329, 328)
+XNDIVP_DIV_ITER(329, 328)
+XNDIVP_MOD_ITER(330, 329)
+XNDIVP_DIV_ITER(330, 329)
+XNDIVP_MOD_ITER(331, 330)
+XNDIVP_DIV_ITER(331, 330)
+XNDIVP_MOD_ITER(332, 331)
+XNDIVP_DIV_ITER(332, 331)
+XNDIVP_MOD_ITER(333, 332)
+XNDIVP_DIV_ITER(333, 332)
+XNDIVP_MOD_ITER(334, 333)
+XNDIVP_DIV_ITER(334, 333)
+XNDIVP_MOD_ITER(335, 334)
+XNDIVP_DIV_ITER(335, 334)
+XNDIVP_MOD_ITER(336, 335)
+XNDIVP_DIV_ITER(336, 335)
+XNDIVP_MOD_ITER(337, 336)
+XNDIVP_DIV_ITER(337, 336)
+XNDIVP_MOD_ITER(338, 337)
+XNDIVP_DIV_ITER(338, 337)
+XNDIVP_MOD_ITER(339, 338)
+XNDIVP_DIV_ITER(339, 338)
+XNDIVP_MOD_ITER(340, 339)
+XNDIVP_DIV_ITER(340, 339)
+XNDIVP_MOD_ITER(341, 340)
+XNDIVP_DIV_ITER(341, 340)
+XNDIVP_MOD_ITER(342, 341)
+XNDIVP_DIV_ITER(342, 341)
+XNDIVP_MOD_ITER(343, 342)
+XNDIVP_DIV_ITER(343, 342)
+XNDIVP_MOD_ITER(344, 343)
+XNDIVP_DIV_ITER(344, 343)
+XNDIVP_MOD_ITER(345, 344)
+XNDIVP_DIV_ITER(345, 344)
+XNDIVP_MOD_ITER(346, 345)
+XNDIVP_DIV_ITER(346, 345)
+XNDIVP_MOD_ITER(347, 346)
+XNDIVP_DIV_ITER(347, 346)
+XNDIVP_MOD_ITER(348, 347)
+XNDIVP_DIV_ITER(348, 347)
+XNDIVP_MOD_ITER(349, 348)
+XNDIVP_DIV_ITER(349, 348)
+XNDIVP_MOD_ITER(350, 349)
+XNDIVP_DIV_ITER(350, 349)
+XNDIVP_MOD_ITER(351, 350)
+XNDIVP_DIV_ITER(351, 350)
+XNDIVP_MOD_ITER(352, 351)
+XNDIVP_DIV_ITER(352, 351)
+XNDIVP_MOD_ITER(353, 352)
+XNDIVP_DIV_ITER(353, 352)
+XNDIVP_MOD_ITER(354, 353)
+XNDIVP_DIV_ITER(354, 353)
+XNDIVP_MOD_ITER(355, 354)
+XNDIVP_DIV_ITER(355, 354)
+XNDIVP_MOD_ITER(356, 355)
+XNDIVP_DIV_ITER(356, 355)
+XNDIVP_MOD_ITER(357, 356)
+XNDIVP_DIV_ITER(357, 356)
+XNDIVP_MOD_ITER(358, 357)
+XNDIVP_DIV_ITER(358, 357)
+XNDIVP_MOD_ITER(359, 358)
+XNDIVP_DIV_ITER(359, 358)
+XNDIVP_MOD_ITER(360, 359)
+XNDIVP_DIV_ITER(360, 359)
+XNDIVP_MOD_ITER(361, 360)
+XNDIVP_DIV_ITER(361, 360)
+XNDIVP_MOD_ITER(362, 361)
+XNDIVP_DIV_ITER(362, 361)
+XNDIVP_MOD_ITER(363, 362)
+XNDIVP_DIV_ITER(363, 362)
+XNDIVP_MOD_ITER(364, 363)
+XNDIVP_DIV_ITER(364, 363)
+XNDIVP_MOD_ITER(365, 364)
+XNDIVP_DIV_ITER(365, 364)
+XNDIVP_MOD_ITER(366, 365)
+XNDIVP_DIV_ITER(366, 365)
+XNDIVP_MOD_ITER(367, 366)
+XNDIVP_DIV_ITER(367, 366)
+XNDIVP_MOD_ITER(368, 367)
+XNDIVP_DIV_ITER(368, 367)
+XNDIVP_MOD_ITER(369, 368)
+XNDIVP_DIV_ITER(369, 368)
+XNDIVP_MOD_ITER(370, 369)
+XNDIVP_DIV_ITER(370, 369)
+XNDIVP_MOD_ITER(371, 370)
+XNDIVP_DIV_ITER(371, 370)
+XNDIVP_MOD_ITER(372, 371)
+XNDIVP_DIV_ITER(372, 371)
+XNDIVP_MOD_ITER(373, 372)
+XNDIVP_DIV_ITER(373, 372)
+XNDIVP_MOD_ITER(374, 373)
+XNDIVP_DIV_ITER(374, 373)
+XNDIVP_MOD_ITER(375, 374)
+XNDIVP_DIV_ITER(375, 374)
+XNDIVP_MOD_ITER(376, 375)
+XNDIVP_DIV_ITER(376, 375)
+XNDIVP_MOD_ITER(377, 376)
+XNDIVP_DIV_ITER(377, 376)
+XNDIVP_MOD_ITER(378, 377)
+XNDIVP_DIV_ITER(378, 377)
+XNDIVP_MOD_ITER(379, 378)
+XNDIVP_DIV_ITER(379, 378)
+XNDIVP_MOD_ITER(380, 379)
+XNDIVP_DIV_ITER(380, 379)
+XNDIVP_MOD_ITER(381, 380)
+XNDIVP_DIV_ITER(381, 380)
+XNDIVP_MOD_ITER(382, 381)
+XNDIVP_DIV_ITER(382, 381)
+XNDIVP_MOD_ITER(383, 382)
+XNDIVP_DIV_ITER(383, 382)
+XNDIVP_MOD_ITER(384, 383)
+XNDIVP_DIV_ITER(384, 383)
+XNDIVP_MOD_ITER(385, 384)
+XNDIVP_DIV_ITER(385, 384)
+XNDIVP_MOD_ITER(386, 385)
+XNDIVP_DIV_ITER(386, 385)
+XNDIVP_MOD_ITER(387, 386)
+XNDIVP_DIV_ITER(387, 386)
+XNDIVP_MOD_ITER(388, 387)
+XNDIVP_DIV_ITER(388, 387)
+XNDIVP_MOD_ITER(389, 388)
+XNDIVP_DIV_ITER(389, 388)
+XNDIVP_MOD_ITER(390, 389)
+XNDIVP_DIV_ITER(390, 389)
+XNDIVP_MOD_ITER(391, 390)
+XNDIVP_DIV_ITER(391, 390)
+XNDIVP_MOD_ITER(392, 391)
+XNDIVP_DIV_ITER(392, 391)
+XNDIVP_MOD_ITER(393, 392)
+XNDIVP_DIV_ITER(393, 392)
+XNDIVP_MOD_ITER(394, 393)
+XNDIVP_DIV_ITER(394, 393)
+XNDIVP_MOD_ITER(395, 394)
+XNDIVP_DIV_ITER(395, 394)
+XNDIVP_MOD_ITER(396, 395)
+XNDIVP_DIV_ITER(396, 395)
+XNDIVP_MOD_ITER(397, 396)
+XNDIVP_DIV_ITER(397, 396)
+XNDIVP_MOD_ITER(398, 397)
+XNDIVP_DIV_ITER(398, 397)
+XNDIVP_MOD_ITER(399, 398)
+XNDIVP_DIV_ITER(399, 398)
+XNDIVP_MOD_ITER(400, 399)
+XNDIVP_DIV_ITER(400, 399)
+XNDIVP_MOD_ITER(401, 400)
+XNDIVP_DIV_ITER(401, 400)
+XNDIVP_MOD_ITER(402, 401)
+XNDIVP_DIV_ITER(402, 401)
+XNDIVP_MOD_ITER(403, 402)
+XNDIVP_DIV_ITER(403, 402)
+XNDIVP_MOD_ITER(404, 403)
+XNDIVP_DIV_ITER(404, 403)
+XNDIVP_MOD_ITER(405, 404)
+XNDIVP_DIV_ITER(405, 404)
+XNDIVP_MOD_ITER(406, 405)
+XNDIVP_DIV_ITER(406, 405)
+XNDIVP_MOD_ITER(407, 406)
+XNDIVP_DIV_ITER(407, 406)
+XNDIVP_MOD_ITER(408, 407)
+XNDIVP_DIV_ITER(408, 407)
+XNDIVP_MOD_ITER(409, 408)
+XNDIVP_DIV_ITER(409, 408)
+XNDIVP_MOD_ITER(410, 409)
+XNDIVP_DIV_ITER(410, 409)
+XNDIVP_MOD_ITER(411, 410)
+XNDIVP_DIV_ITER(411, 410)
+XNDIVP_MOD_ITER(412, 411)
+XNDIVP_DIV_ITER(412, 411)
+XNDIVP_MOD_ITER(413, 412)
+XNDIVP_DIV_ITER(413, 412)
+XNDIVP_MOD_ITER(414, 413)
+XNDIVP_DIV_ITER(414, 413)
+XNDIVP_MOD_ITER(415, 414)
+XNDIVP_DIV_ITER(415, 414)
+XNDIVP_MOD_ITER(416, 415)
+XNDIVP_DIV_ITER(416, 415)
+XNDIVP_MOD_ITER(417, 416)
+XNDIVP_DIV_ITER(417, 416)
+XNDIVP_MOD_ITER(418, 417)
+XNDIVP_DIV_ITER(418, 417)
+XNDIVP_MOD_ITER(419, 418)
+XNDIVP_DIV_ITER(419, 418)
+XNDIVP_MOD_ITER(420, 419)
+XNDIVP_DIV_ITER(420, 419)
+XNDIVP_MOD_ITER(421, 420)
+XNDIVP_DIV_ITER(421, 420)
+XNDIVP_MOD_ITER(422, 421)
+XNDIVP_DIV_ITER(422, 421)
+XNDIVP_MOD_ITER(423, 422)
+XNDIVP_DIV_ITER(423, 422)
+XNDIVP_MOD_ITER(424, 423)
+XNDIVP_DIV_ITER(424, 423)
+XNDIVP_MOD_ITER(425, 424)
+XNDIVP_DIV_ITER(425, 424)
+XNDIVP_MOD_ITER(426, 425)
+XNDIVP_DIV_ITER(426, 425)
+XNDIVP_MOD_ITER(427, 426)
+XNDIVP_DIV_ITER(427, 426)
+XNDIVP_MOD_ITER(428, 427)
+XNDIVP_DIV_ITER(428, 427)
+XNDIVP_MOD_ITER(429, 428)
+XNDIVP_DIV_ITER(429, 428)
+XNDIVP_MOD_ITER(430, 429)
+XNDIVP_DIV_ITER(430, 429)
+XNDIVP_MOD_ITER(431, 430)
+XNDIVP_DIV_ITER(431, 430)
+XNDIVP_MOD_ITER(432, 431)
+XNDIVP_DIV_ITER(432, 431)
+XNDIVP_MOD_ITER(433, 432)
+XNDIVP_DIV_ITER(433, 432)
+XNDIVP_MOD_ITER(434, 433)
+XNDIVP_DIV_ITER(434, 433)
+XNDIVP_MOD_ITER(435, 434)
+XNDIVP_DIV_ITER(435, 434)
+XNDIVP_MOD_ITER(436, 435)
+XNDIVP_DIV_ITER(436, 435)
+XNDIVP_MOD_ITER(437, 436)
+XNDIVP_DIV_ITER(437, 436)
+XNDIVP_MOD_ITER(438, 437)
+XNDIVP_DIV_ITER(438, 437)
+XNDIVP_MOD_ITER(439, 438)
+XNDIVP_DIV_ITER(439, 438)
+XNDIVP_MOD_ITER(440, 439)
+XNDIVP_DIV_ITER(440, 439)
+XNDIVP_MOD_ITER(441, 440)
+XNDIVP_DIV_ITER(441, 440)
+XNDIVP_MOD_ITER(442, 441)
+XNDIVP_DIV_ITER(442, 441)
+XNDIVP_MOD_ITER(443, 442)
+XNDIVP_DIV_ITER(443, 442)
+XNDIVP_MOD_ITER(444, 443)
+XNDIVP_DIV_ITER(444, 443)
+XNDIVP_MOD_ITER(445, 444)
+XNDIVP_DIV_ITER(445, 444)
+XNDIVP_MOD_ITER(446, 445)
+XNDIVP_DIV_ITER(446, 445)
+XNDIVP_MOD_ITER(447, 446)
+XNDIVP_DIV_ITER(447, 446)
+XNDIVP_MOD_ITER(448, 447)
+XNDIVP_DIV_ITER(448, 447)
+XNDIVP_MOD_ITER(449, 448)
+XNDIVP_DIV_ITER(449, 448)
+XNDIVP_MOD_ITER(450, 449)
+XNDIVP_DIV_ITER(450, 449)
+XNDIVP_MOD_ITER(451, 450)
+XNDIVP_DIV_ITER(451, 450)
+XNDIVP_MOD_ITER(452, 451)
+XNDIVP_DIV_ITER(452, 451)
+XNDIVP_MOD_ITER(453, 452)
+XNDIVP_DIV_ITER(453, 452)
+XNDIVP_MOD_ITER(454, 453)
+XNDIVP_DIV_ITER(454, 453)
+XNDIVP_MOD_ITER(455, 454)
+XNDIVP_DIV_ITER(455, 454)
+XNDIVP_MOD_ITER(456, 455)
+XNDIVP_DIV_ITER(456, 455)
+XNDIVP_MOD_ITER(457, 456)
+XNDIVP_DIV_ITER(457, 456)
+XNDIVP_MOD_ITER(458, 457)
+XNDIVP_DIV_ITER(458, 457)
+XNDIVP_MOD_ITER(459, 458)
+XNDIVP_DIV_ITER(459, 458)
+XNDIVP_MOD_ITER(460, 459)
+XNDIVP_DIV_ITER(460, 459)
+XNDIVP_MOD_ITER(461, 460)
+XNDIVP_DIV_ITER(461, 460)
+XNDIVP_MOD_ITER(462, 461)
+XNDIVP_DIV_ITER(462, 461)
+XNDIVP_MOD_ITER(463, 462)
+XNDIVP_DIV_ITER(463, 462)
+XNDIVP_MOD_ITER(464, 463)
+XNDIVP_DIV_ITER(464, 463)
+XNDIVP_MOD_ITER(465, 464)
+XNDIVP_DIV_ITER(465, 464)
+XNDIVP_MOD_ITER(466, 465)
+XNDIVP_DIV_ITER(466, 465)
+XNDIVP_MOD_ITER(467, 466)
+XNDIVP_DIV_ITER(467, 466)
+XNDIVP_MOD_ITER(468, 467)
+XNDIVP_DIV_ITER(468, 467)
+XNDIVP_MOD_ITER(469, 468)
+XNDIVP_DIV_ITER(469, 468)
+XNDIVP_MOD_ITER(470, 469)
+XNDIVP_DIV_ITER(470, 469)
+XNDIVP_MOD_ITER(471, 470)
+XNDIVP_DIV_ITER(471, 470)
+XNDIVP_MOD_ITER(472, 471)
+XNDIVP_DIV_ITER(472, 471)
+XNDIVP_MOD_ITER(473, 472)
+XNDIVP_DIV_ITER(473, 472)
+XNDIVP_MOD_ITER(474, 473)
+XNDIVP_DIV_ITER(474, 473)
+XNDIVP_MOD_ITER(475, 474)
+XNDIVP_DIV_ITER(475, 474)
+XNDIVP_MOD_ITER(476, 475)
+XNDIVP_DIV_ITER(476, 475)
+XNDIVP_MOD_ITER(477, 476)
+XNDIVP_DIV_ITER(477, 476)
+XNDIVP_MOD_ITER(478, 477)
+XNDIVP_DIV_ITER(478, 477)
+XNDIVP_MOD_ITER(479, 478)
+XNDIVP_DIV_ITER(479, 478)
+XNDIVP_MOD_ITER(480, 479)
+XNDIVP_DIV_ITER(480, 479)
+XNDIVP_MOD_ITER(481, 480)
+XNDIVP_DIV_ITER(481, 480)
+XNDIVP_MOD_ITER(482, 481)
+XNDIVP_DIV_ITER(482, 481)
+XNDIVP_MOD_ITER(483, 482)
+XNDIVP_DIV_ITER(483, 482)
+XNDIVP_MOD_ITER(484, 483)
+XNDIVP_DIV_ITER(484, 483)
+XNDIVP_MOD_ITER(485, 484)
+XNDIVP_DIV_ITER(485, 484)
+XNDIVP_MOD_ITER(486, 485)
+XNDIVP_DIV_ITER(486, 485)
+XNDIVP_MOD_ITER(487, 486)
+XNDIVP_DIV_ITER(487, 486)
+XNDIVP_MOD_ITER(488, 487)
+XNDIVP_DIV_ITER(488, 487)
+XNDIVP_MOD_ITER(489, 488)
+XNDIVP_DIV_ITER(489, 488)
+XNDIVP_MOD_ITER(490, 489)
+XNDIVP_DIV_ITER(490, 489)
+XNDIVP_MOD_ITER(491, 490)
+XNDIVP_DIV_ITER(491, 490)
+XNDIVP_MOD_ITER(492, 491)
+XNDIVP_DIV_ITER(492, 491)
+XNDIVP_MOD_ITER(493, 492)
+XNDIVP_DIV_ITER(493, 492)
+XNDIVP_MOD_ITER(494, 493)
+XNDIVP_DIV_ITER(494, 493)
+XNDIVP_MOD_ITER(495, 494)
+XNDIVP_DIV_ITER(495, 494)
+XNDIVP_MOD_ITER(496, 495)
+XNDIVP_DIV_ITER(496, 495)
+XNDIVP_MOD_ITER(497, 496)
+XNDIVP_DIV_ITER(497, 496)
+XNDIVP_MOD_ITER(498, 497)
+XNDIVP_DIV_ITER(498, 497)
+XNDIVP_MOD_ITER(499, 498)
+XNDIVP_DIV_ITER(499, 498)
+XNDIVP_MOD_ITER(500, 499)
+XNDIVP_DIV_ITER(500, 499)
+XNDIVP_MOD_ITER(501, 500)
+XNDIVP_DIV_ITER(501, 500)
+XNDIVP_MOD_ITER(502, 501)
+XNDIVP_DIV_ITER(502, 501)
+XNDIVP_MOD_ITER(503, 502)
+XNDIVP_DIV_ITER(503, 502)
+XNDIVP_MOD_ITER(504, 503)
+XNDIVP_DIV_ITER(504, 503)
+XNDIVP_MOD_ITER(505, 504)
+XNDIVP_DIV_ITER(505, 504)
+XNDIVP_MOD_ITER(506, 505)
+XNDIVP_DIV_ITER(506, 505)
+XNDIVP_MOD_ITER(507, 506)
+XNDIVP_DIV_ITER(507, 506)
+XNDIVP_MOD_ITER(508, 507)
+XNDIVP_DIV_ITER(508, 507)
+XNDIVP_MOD_ITER(509, 508)
+XNDIVP_DIV_ITER(509, 508)
+XNDIVP_MOD_ITER(510, 509)
+XNDIVP_DIV_ITER(510, 509)
+XNDIVP_MOD_ITER(511, 510)
+XNDIVP_DIV_ITER(511, 510)
+XNDIVP_MOD_ITER(512, 511)
+XNDIVP_DIV_ITER(512, 511)
+XNDIVP_MOD_ITER(513, 512)
+XNDIVP_DIV_ITER(513, 512)
+XNDIVP_MOD_ITER(514, 513)
+XNDIVP_DIV_ITER(514, 513)
+XNDIVP_MOD_ITER(515, 514)
+XNDIVP_DIV_ITER(515, 514)
+XNDIVP_MOD_ITER(516, 515)
+XNDIVP_DIV_ITER(516, 515)
+XNDIVP_MOD_ITER(517, 516)
+XNDIVP_DIV_ITER(517, 516)
+XNDIVP_MOD_ITER(518, 517)
+XNDIVP_DIV_ITER(518, 517)
+XNDIVP_MOD_ITER(519, 518)
+XNDIVP_DIV_ITER(519, 518)
+XNDIVP_MOD_ITER(520, 519)
+XNDIVP_DIV_ITER(520, 519)
+XNDIVP_MOD_ITER(521, 520)
+XNDIVP_DIV_ITER(521, 520)
+XNDIVP_MOD_ITER(522, 521)
+XNDIVP_DIV_ITER(522, 521)
+XNDIVP_MOD_ITER(523, 522)
+XNDIVP_DIV_ITER(523, 522)
+XNDIVP_MOD_ITER(524, 523)
+XNDIVP_DIV_ITER(524, 523)
+XNDIVP_MOD_ITER(525, 524)
+XNDIVP_DIV_ITER(525, 524)
+XNDIVP_MOD_ITER(526, 525)
+XNDIVP_DIV_ITER(526, 525)
+XNDIVP_MOD_ITER(527, 526)
+XNDIVP_DIV_ITER(527, 526)
+XNDIVP_MOD_ITER(528, 527)
+XNDIVP_DIV_ITER(528, 527)
+XNDIVP_MOD_ITER(529, 528)
+XNDIVP_DIV_ITER(529, 528)
+XNDIVP_MOD_ITER(530, 529)
+XNDIVP_DIV_ITER(530, 529)
+XNDIVP_MOD_ITER(531, 530)
+XNDIVP_DIV_ITER(531, 530)
+XNDIVP_MOD_ITER(532, 531)
+XNDIVP_DIV_ITER(532, 531)
+XNDIVP_MOD_ITER(533, 532)
+XNDIVP_DIV_ITER(533, 532)
+XNDIVP_MOD_ITER(534, 533)
+XNDIVP_DIV_ITER(534, 533)
+XNDIVP_MOD_ITER(535, 534)
+XNDIVP_DIV_ITER(535, 534)
+XNDIVP_MOD_ITER(536, 535)
+XNDIVP_DIV_ITER(536, 535)
+XNDIVP_MOD_ITER(537, 536)
+XNDIVP_DIV_ITER(537, 536)
+XNDIVP_MOD_ITER(538, 537)
+XNDIVP_DIV_ITER(538, 537)
+XNDIVP_MOD_ITER(539, 538)
+XNDIVP_DIV_ITER(539, 538)
+XNDIVP_MOD_ITER(540, 539)
+XNDIVP_DIV_ITER(540, 539)
+XNDIVP_MOD_ITER(541, 540)
+XNDIVP_DIV_ITER(541, 540)
+XNDIVP_MOD_ITER(542, 541)
+XNDIVP_DIV_ITER(542, 541)
+XNDIVP_MOD_ITER(543, 542)
+XNDIVP_DIV_ITER(543, 542)
+XNDIVP_MOD_ITER(544, 543)
+XNDIVP_DIV_ITER(544, 543)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/crc32x86.c	Mon Feb 09 01:15:00 2026 -0500
@@ -0,0 +1,203 @@
+/* x86-specific CRC routines */
+
+#include "crc32.h"
+#include "crc32i.h"
+#include <stdio.h>
+#include <immintrin.h>
+
+#define BITREVERSE64EX(THIS) \
+( \
+	  (((THIS) & 0x0000000000000001) << 63) \
+	| (((THIS) & 0x0000000000000002) << 61) \
+	| (((THIS) & 0x0000000000000004) << 59) \
+	| (((THIS) & 0x0000000000000008) << 57) \
+	| (((THIS) & 0x0000000000000010) << 55) \
+	| (((THIS) & 0x0000000000000020) << 53) \
+	| (((THIS) & 0x0000000000000040) << 51) \
+	| (((THIS) & 0x0000000000000080) << 49) \
+	| (((THIS) & 0x0000000000000100) << 47) \
+	| (((THIS) & 0x0000000000000200) << 45) \
+	| (((THIS) & 0x0000000000000400) << 43) \
+	| (((THIS) & 0x0000000000000800) << 41) \
+	| (((THIS) & 0x0000000000001000) << 39) \
+	| (((THIS) & 0x0000000000002000) << 37) \
+	| (((THIS) & 0x0000000000004000) << 35) \
+	| (((THIS) & 0x0000000000008000) << 33) \
+	| (((THIS) & 0x0000000000010000) << 31) \
+	| (((THIS) & 0x0000000000020000) << 29) \
+	| (((THIS) & 0x0000000000040000) << 27) \
+	| (((THIS) & 0x0000000000080000) << 25) \
+	| (((THIS) & 0x0000000000100000) << 23) \
+	| (((THIS) & 0x0000000000200000) << 21) \
+	| (((THIS) & 0x0000000000400000) << 19) \
+	| (((THIS) & 0x0000000000800000) << 17) \
+	| (((THIS) & 0x0000000001000000) << 15) \
+	| (((THIS) & 0x0000000002000000) << 13) \
+	| (((THIS) & 0x0000000004000000) << 11) \
+	| (((THIS) & 0x0000000008000000) <<  9) \
+	| (((THIS) & 0x0000000010000000) <<  7) \
+	| (((THIS) & 0x0000000020000000) <<  5) \
+	| (((THIS) & 0x0000000040000000) <<  3) \
+	| (((THIS) & 0x0000000080000000) <<  1) \
+	| (((THIS) & 0x0000000100000000) >>  1) \
+	| (((THIS) & 0x0000000200000000) >>  3) \
+	| (((THIS) & 0x0000000400000000) >>  5) \
+	| (((THIS) & 0x0000000800000000) >>  7) \
+	| (((THIS) & 0x0000001000000000) >>  9) \
+	| (((THIS) & 0x0000002000000000) >> 11) \
+	| (((THIS) & 0x0000004000000000) >> 13) \
+	| (((THIS) & 0x0000008000000000) >> 15) \
+	| (((THIS) & 0x0000010000000000) >> 17) \
+	| (((THIS) & 0x0000020000000000) >> 19) \
+	| (((THIS) & 0x0000040000000000) >> 21) \
+	| (((THIS) & 0x0000080000000000) >> 23) \
+	| (((THIS) & 0x0000100000000000) >> 25) \
+	| (((THIS) & 0x0000200000000000) >> 27) \
+	| (((THIS) & 0x0000400000000000) >> 29) \
+	| (((THIS) & 0x0000800000000000) >> 31) \
+	| (((THIS) & 0x0001000000000000) >> 33) \
+	| (((THIS) & 0x0002000000000000) >> 35) \
+	| (((THIS) & 0x0004000000000000) >> 37) \
+	| (((THIS) & 0x0008000000000000) >> 39) \
+	| (((THIS) & 0x0010000000000000) >> 41) \
+	| (((THIS) & 0x0020000000000000) >> 43) \
+	| (((THIS) & 0x0040000000000000) >> 45) \
+	| (((THIS) & 0x0080000000000000) >> 47) \
+	| (((THIS) & 0x0100000000000000) >> 49) \
+	| (((THIS) & 0x0200000000000000) >> 51) \
+	| (((THIS) & 0x0400000000000000) >> 53) \
+	| (((THIS) & 0x0800000000000000) >> 55) \
+	| (((THIS) & 0x1000000000000000) >> 57) \
+	| (((THIS) & 0x2000000000000000) >> 59) \
+	| (((THIS) & 0x4000000000000000) >> 61) \
+	| (((THIS) & 0x8000000000000000) >> 63) \
+)
+
+#define BITREVERSE64(THIS) \
+	(BITREVERSE64EX((uint64_t)(THIS)))
+
+#define BITREVERSE32EX(THIS) \
+( \
+	(((THIS) & 0x00000001) << 31) \
+	| (((THIS) & 0x00000002) << 29) \
+	| (((THIS) & 0x00000004) << 27) \
+	| (((THIS) & 0x00000008) << 25) \
+	| (((THIS) & 0x00000010) << 23) \
+	| (((THIS) & 0x00000020) << 21) \
+	| (((THIS) & 0x00000040) << 19) \
+	| (((THIS) & 0x00000080) << 17) \
+	| (((THIS) & 0x00000100) << 15) \
+	| (((THIS) & 0x00000200) << 13) \
+	| (((THIS) & 0x00000400) << 11) \
+	| (((THIS) & 0x00000800) <<  9) \
+	| (((THIS) & 0x00001000) <<  7) \
+	| (((THIS) & 0x00002000) <<  5) \
+	| (((THIS) & 0x00004000) <<  3) \
+	| (((THIS) & 0x00008000) <<  1) \
+	| (((THIS) & 0x00010000) >>  1) \
+	| (((THIS) & 0x00020000) >>  3) \
+	| (((THIS) & 0x00040000) >>  5) \
+	| (((THIS) & 0x00080000) >>  7) \
+	| (((THIS) & 0x00100000) >>  9) \
+	| (((THIS) & 0x00200000) >> 11) \
+	| (((THIS) & 0x00400000) >> 13) \
+	| (((THIS) & 0x00800000) >> 15) \
+	| (((THIS) & 0x01000000) >> 17) \
+	| (((THIS) & 0x02000000) >> 19) \
+	| (((THIS) & 0x04000000) >> 21) \
+	| (((THIS) & 0x08000000) >> 23) \
+	| (((THIS) & 0x10000000) >> 25) \
+	| (((THIS) & 0x20000000) >> 27) \
+	| (((THIS) & 0x40000000) >> 29) \
+	| (((THIS) & 0x80000000) >> 31) \
+)
+
+#define BITREVERSE32(THIS) \
+	(BITREVERSE32EX((uint32_t)(THIS)))
+
+enum {
+	XNDIVP_RK08F = (BITREVERSE32(CRC32_POLYNOMIAL)) | 0x100000000ull,
+	XNDIVP_RK08R = (BITREVERSE64(XNDIVP_RK08F) >> 31) | 1,
+
+	/* The beginning ... */
+	XNDIVP_MOD_ITER_0 = XNDIVP_RK08F,
+	XNDIVP_DIV_ITER_0 = 1,
+
+/* to generate table, run this:
+
+#include <stdio.h>
+int main(void)
+{
+	unsigned i;
+
+	for (i = 1; i <= (4*128+32); i++) {
+		printf("XNDIVP_MOD_ITER(%u, %u)\n", i, i - 1);
+		printf("XNDIVP_DIV_ITER(%u, %u)\n", i, i - 1);
+	}
+
+	return 0;
+}
+*/
+
+#define XNDIVP_MOD_ITER(This, last) \
+	XNDIVP_MOD_ITER_##This = (uint64_t)((XNDIVP_MOD_ITER_##last << 1) ^ ((XNDIVP_MOD_ITER_##last & 0x80000000) ? (XNDIVP_RK08F) : 0)),
+
+#define XNDIVP_DIV_ITER(This, last) \
+	XNDIVP_DIV_ITER_##This = (uint64_t)(((uint64_t)XNDIVP_DIV_ITER_##last << 1) | ((XNDIVP_MOD_ITER_##last & 0x80000000ull) ? 1 : 0)),
+
+#include "crc32x86-tab.h"
+
+#undef XNDIVP_MOD_ITER
+#undef XNDIVP_DIV_ITER
+
+#define FIXUPCONSTANTS(x) (BITREVERSE64(x) >> 31)
+	RK01 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64),
+	RK02 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_128),
+	RK05 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64),
+	RK06 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_32),
+	RK07 = FIXUPCONSTANTS(XNDIVP_DIV_ITER_32),
+	RK08 = XNDIVP_RK08R,
+#undef FIXUPCONSTANTS
+};
+
+__attribute__((__target__("vpclmulqdq")))
+uint32_t crc32x86_vpclmulqdq_r(uint32_t crc, const unsigned char *msg, size_t sz)
+{
+	/* This actually works for 16-byte buffers too, but whether it's actually
+	 * useful or faster is another question entirely */
+	if (sz >= 32) {
+		static const __attribute__((__aligned__(16))) uint64_t rk01[2] = {RK01, RK02},
+				rk05[2] = {RK05, RK06},
+				rk07[2] = {RK07, RK08},
+				mask2[2] = {0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF};
+		__m128i rk, msgxmm;
+
+		msgxmm = _mm_xor_si128(_mm_load_si128((__m128i *)msg), _mm_cvtsi32_si128(crc));
+
+		rk = _mm_load_si128((__m128i *)rk01);
+
+		for (msg += 16, sz -= 16; sz >= 16; msg += 16, sz -= 16) {
+			msgxmm = _mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x10), _mm_clmulepi64_si128(msgxmm, rk, 0x01)), _mm_load_si128((__m128i *)msg));
+		}
+
+		rk = _mm_load_si128((__m128i *)rk05);
+
+		msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), _mm_srli_si128(msgxmm, 8));
+
+		msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_slli_si128(msgxmm, 12), rk, 0x11), _mm_and_si128(msgxmm, _mm_load_si128((__m128i *)mask2)));
+
+		/* Barrett Reduction */
+		rk = _mm_load_si128((__m128i *)rk07);
+		msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), rk, 0x10), msgxmm);
+
+		crc = _mm_extract_epi32(msgxmm, 2);
+	}
+
+	if (!sz) return crc;
+
+	/* We were already aligned on a 16-byte boundary going in (hopefully
+	 * or else it will break), and we process 16-bytes at a time. This
+	 * means `msg` is aligned 16-bytes, a multiple of 4-byte, so we don't
+	 * need to align any more (or use crc32c_r). */
+	return crc32qw_r(crc, msg, sz);
+}