# HG changeset patch # User Paper # Date 1770617700 18000 # Node ID 422835bc1acaa1cc9c7ff7850d0f0231f195e04b *: checkin diff -r 000000000000 -r 422835bc1aca LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/LICENSE Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,58 @@ +Most of this project is licensed under the common zlib license: + +------------------------------------------------------------------------------ + +Copyright (c) Paper 2026 + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + +------------------------------------------------------------------------------ + +The code taking advantage of the x86-specific vector instructions was derived +almost entirely from code written by Intel employees, released under the +BSD 3-clause license: + +------------------------------------------------------------------------------ + +Copyright(c) 2011-2015 Intel Corporation All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +------------------------------------------------------------------------------ diff -r 000000000000 -r 422835bc1aca Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Makefile Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,5 @@ +crc32: crc32.o crc32-table.o crc32-test.o crc32c.o crc32qw.o crc32x86.o + $(CC) -o $@ $^ + +clean: + $(RM) crc32 *.o diff -r 000000000000 -r 422835bc1aca README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,11 @@ +This code was mostly written as an experiment, to see just how much +could be done using only the C preprocessor (give or take). Turns out +you can do quite a lot. + +In fact, *all* of the tables in this library are generated at compile-time. +The polynomial is #define'd in crc32i.h, and all of the tables are generated +through a combination of enums and preprocessor trickery. Note that this +may cause this code to compile slowly on some machines or compilers. + +At the moment it is hardcoded for x86-64 and gcc, but it could be adapted to +other compilers if they also have features like e.g. alignas() or whatever. \ No newline at end of file diff -r 000000000000 -r 422835bc1aca crc32-table.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32-table.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,58 @@ +#include "crc32i.h" + +/* This builds our CRC table at compile-time rather than runtime. + * Note: We should have a way to change the polynomial at runtime too. */ + +#define CRC32_MASK(crc) (-((crc) & 1)) + +/* Does one iteration of the 8-time loop to generate one byte of the CRC table. */ +#define CRC32_PRECALC_EX(crc) (((crc) >> 1) ^ ((CRC32_POLYNOMIAL) & CRC32_MASK(crc))) + +/* Does all eight iterations of the loop to generate one byte. */ +#define CRC32_PRECALC_E(byte) \ + (CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(CRC32_PRECALC_EX(byte))))))))) + +/* Simple wrapper of CRC32_PRECALC_E that converts everything to uint32_t */ +#define CRC32_PRECALC(byte) \ + CRC32_PRECALC_E((uint32_t)(byte)) + +#define CRC32_PRECALC_0(byte) \ + CRC32_PRECALC(byte), CRC32_PRECALC((byte) | 0x01) + +#define CRC32_PRECALC_1(byte) \ + CRC32_PRECALC_0(byte), CRC32_PRECALC_0((byte) | UINT32_C(0x02)) + +#define CRC32_PRECALC_2(byte) \ + CRC32_PRECALC_1(byte), CRC32_PRECALC_1((byte) | UINT32_C(0x04)) + +#define CRC32_PRECALC_3(byte) \ + CRC32_PRECALC_2(byte), CRC32_PRECALC_2((byte) | UINT32_C(0x08)) + +#define CRC32_PRECALC_4(byte) \ + CRC32_PRECALC_3(byte), CRC32_PRECALC_3((byte) | UINT32_C(0x10)) + +#define CRC32_PRECALC_5(byte) \ + CRC32_PRECALC_4(byte), CRC32_PRECALC_4((byte) | UINT32_C(0x20)) + +#define CRC32_PRECALC_6(byte) \ + CRC32_PRECALC_5(byte), CRC32_PRECALC_5((byte) | UINT32_C(0x40)) + +#define CRC32_PRECALC_7(byte) \ + CRC32_PRECALC_6(byte), CRC32_PRECALC_6((byte) | UINT32_C(0x80)) + +const uint32_t crc32_tab[256] = { + CRC32_PRECALC_7(0) +}; + +#undef CRC32_MASK +#undef CRC32_PRECALC_EX +#undef CRC32_PRECALC_E +#undef CRC32_PRECALC +#undef CRC32_PRECALC_0 +#undef CRC32_PRECALC_1 +#undef CRC32_PRECALC_2 +#undef CRC32_PRECALC_3 +#undef CRC32_PRECALC_4 +#undef CRC32_PRECALC_5 +#undef CRC32_PRECALC_6 +#undef CRC32_PRECALC_7 diff -r 000000000000 -r 422835bc1aca crc32-test.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32-test.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,37 @@ +#include "crc32i.h" +#include + +/* Test implementations and make sure they agree with each other */ +int crc32_test(void) +{ + /* Force alignment :) */ + static const __attribute__((__aligned__(CRC32_MAX_ALIGNMENT))) unsigned char testdata[1024] = +#define DOUBLE(x) x x +DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE(DOUBLE("\x01\x02\x04\x08\x10\x20\x40\x80"))))))) +#undef DOUBLE + ; + static const crc32_r_spec crc[] = { + crc32c_r, + crc32qw_r, + crc32x86_vpclmulqdq_r + }; + size_t i; + + uint32_t crcc = crc32(testdata, sizeof(testdata)); + + for (i = 0; i < ARRAY_SIZE(crc); i++) { + uint32_t thiscrc = ~crc[i](0xFFFFFFFF, testdata, sizeof(testdata)); + + if (thiscrc != crcc) { + fprintf(stderr, "%zu, mismatch: %08x, %08x\n", i, crcc, thiscrc); + return -1; + } + } + + return 0; +} + +int main(void) +{ + return -crc32_test(); +} diff -r 000000000000 -r 422835bc1aca crc32.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,50 @@ +#include "crc32.h" +#include "crc32i.h" +#include +#include +#include +#include +#include +#include + +/* Align to a byte offset using the given crc function. */ +static void crc32_align(uint32_t *crc, crc32_r_spec crcfunc, size_t align, const unsigned char **message, size_t *sz) +{ + size_t sz8, szs; + + /* Alignment check */ + if (ALIGNED(*message, align)) + return; + + /* Calculate size needed to align */ + sz8 = align - ((uintptr_t)message % align); + szs = MIN(*sz, sz8); + + *crc = crcfunc(*crc, *message, sz8); + *message += sz8; + *sz -= sz8; + + if (szs == sz8) assert(ALIGNED(*message, align)); +} + +CRC32_API +uint32_t crc32(const unsigned char *message, size_t sz) +{ + uint32_t crc; + size_t i; + + if (!sz) + return 0; + + crc = 0xFFFFFFFF; + crc32_align(&crc, crc32c_r, ALIGNOF(uint32_t), &message, &sz); + if (!sz) return ~crc; +#ifdef __x86_64__ + crc32_align(&crc, crc32qw_r, 16, &message, &sz); + if (!sz) return ~crc; + + return ~crc32x86_vpclmulqdq_r(crc, message, sz); +#else + return ~crc32qw_r(crc, message, sz); +#endif +} diff -r 000000000000 -r 422835bc1aca crc32.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32.h Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,9 @@ +#ifndef CRC32_H_ +#define CRC32_H_ + +#include +#include + +uint32_t crc32(const unsigned char *msg, size_t sz); + +#endif diff -r 000000000000 -r 422835bc1aca crc32c.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32c.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,11 @@ +#include "crc32i.h" + +uint32_t crc32c_r(uint32_t crc, const unsigned char *message, size_t sz) +{ + size_t i; + + for (i = 0; i < sz; i++) + crc = (crc >> 8) ^ crc32_tab[(crc ^ message[i]) & 0xFF]; + + return crc; +} diff -r 000000000000 -r 422835bc1aca crc32i.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32i.h Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,45 @@ +/* internal crc32 definitions */ + +#include "crc32.h" + +#define CRC32_API __attribute__((__visibility__("default"))) + +/* all LUTs etc. are generated at compile time. + * eventually, I'd like to have all internal info + * stored in one opaque pointer. this pointer will + * contain the LUTs (or maybe a pointer to one). + * We could also create these structures at compile + * time and have a flag that says whether it needs + * to be destroyed or if it can be cached. */ +#define CRC32_POLYNOMIAL 0xedb88320 + +/* crc32b.c */ +#ifdef __GNUC__ +# define CRC32_PURE __attribute__((__pure__)) +#else +# define CRC32_PURE +#endif + +#define ALIGNOF(type) offsetof(struct { type a; char b; }, b) +#define ALIGNED(ptr, alignment) (((uintptr_t)(ptr) % (alignment)) == 0) +#define ALIGNED_TYPE(ptr, type) ALIGNED(ptr, ALIGNOF(type)) + +typedef uint32_t (*crc32_r_spec)(uint32_t, const unsigned char *, size_t); + +/* shared by crc32c and crc32qw */ +extern const uint32_t crc32_tab[256]; + +/* Calculates crc32 by bytes. Has no alignment requirement */ +uint32_t crc32c_r(uint32_t crc, const unsigned char *message, size_t sz); +/* Calculates crc32 in dwords. Requires 4-byte alignment */ +uint32_t crc32qw_r(uint32_t crc, const unsigned char *message, size_t sz); +/* Calculates crc32 using intel SIMD. Requires 16-byte alignment */ +uint32_t crc32x86_vpclmulqdq_r(uint32_t crc, const unsigned char *msg, size_t sz); + +/* Maximum alignment value for each impl to work */ +#define MAX(x, y) ((x)>(y)?(x):(y)) +#define MIN(x, y) ((x)<(y)?(x):(y)) +#define CRC32_MAX_ALIGNMENT MAX(16, ALIGNOF(uint32_t)) + +#define ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) + diff -r 000000000000 -r 422835bc1aca crc32qw.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32qw.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,36 @@ +#include "crc32i.h" + +uint32_t crc32qw_r(uint32_t crc, const unsigned char *message, size_t sz) +{ + while (sz >= 4) { + union { + unsigned char b[4]; + uint32_t w; + } w; + +#if (__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 2) + /* Tell GCC that we will be aliasing */ + w.w = *(__attribute__((__may_alias__)) uint32_t *)message; +#else + /* ... */ + w.b[0] = message[0]; + w.b[1] = message[1]; + w.b[2] = message[2]; + w.b[3] = message[3]; +#endif + + crc ^= w.w; + + crc = (crc >> 8) ^ crc32_tab[crc & 0xFF]; + crc = (crc >> 8) ^ crc32_tab[crc & 0xFF]; + crc = (crc >> 8) ^ crc32_tab[crc & 0xFF]; + crc = (crc >> 8) ^ crc32_tab[crc & 0xFF]; + + message += 4; + sz -= 4; + } + + if (!sz) return crc; + + return crc32c_r(crc, message, sz); +} diff -r 000000000000 -r 422835bc1aca crc32x86-tab.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32x86-tab.h Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,1088 @@ +XNDIVP_MOD_ITER(1, 0) +XNDIVP_DIV_ITER(1, 0) +XNDIVP_MOD_ITER(2, 1) +XNDIVP_DIV_ITER(2, 1) +XNDIVP_MOD_ITER(3, 2) +XNDIVP_DIV_ITER(3, 2) +XNDIVP_MOD_ITER(4, 3) +XNDIVP_DIV_ITER(4, 3) +XNDIVP_MOD_ITER(5, 4) +XNDIVP_DIV_ITER(5, 4) +XNDIVP_MOD_ITER(6, 5) +XNDIVP_DIV_ITER(6, 5) +XNDIVP_MOD_ITER(7, 6) +XNDIVP_DIV_ITER(7, 6) +XNDIVP_MOD_ITER(8, 7) +XNDIVP_DIV_ITER(8, 7) +XNDIVP_MOD_ITER(9, 8) +XNDIVP_DIV_ITER(9, 8) +XNDIVP_MOD_ITER(10, 9) +XNDIVP_DIV_ITER(10, 9) +XNDIVP_MOD_ITER(11, 10) +XNDIVP_DIV_ITER(11, 10) +XNDIVP_MOD_ITER(12, 11) +XNDIVP_DIV_ITER(12, 11) +XNDIVP_MOD_ITER(13, 12) +XNDIVP_DIV_ITER(13, 12) +XNDIVP_MOD_ITER(14, 13) +XNDIVP_DIV_ITER(14, 13) +XNDIVP_MOD_ITER(15, 14) +XNDIVP_DIV_ITER(15, 14) +XNDIVP_MOD_ITER(16, 15) +XNDIVP_DIV_ITER(16, 15) +XNDIVP_MOD_ITER(17, 16) +XNDIVP_DIV_ITER(17, 16) +XNDIVP_MOD_ITER(18, 17) +XNDIVP_DIV_ITER(18, 17) +XNDIVP_MOD_ITER(19, 18) +XNDIVP_DIV_ITER(19, 18) +XNDIVP_MOD_ITER(20, 19) +XNDIVP_DIV_ITER(20, 19) +XNDIVP_MOD_ITER(21, 20) +XNDIVP_DIV_ITER(21, 20) +XNDIVP_MOD_ITER(22, 21) +XNDIVP_DIV_ITER(22, 21) +XNDIVP_MOD_ITER(23, 22) +XNDIVP_DIV_ITER(23, 22) +XNDIVP_MOD_ITER(24, 23) +XNDIVP_DIV_ITER(24, 23) +XNDIVP_MOD_ITER(25, 24) +XNDIVP_DIV_ITER(25, 24) +XNDIVP_MOD_ITER(26, 25) +XNDIVP_DIV_ITER(26, 25) +XNDIVP_MOD_ITER(27, 26) +XNDIVP_DIV_ITER(27, 26) +XNDIVP_MOD_ITER(28, 27) +XNDIVP_DIV_ITER(28, 27) +XNDIVP_MOD_ITER(29, 28) +XNDIVP_DIV_ITER(29, 28) +XNDIVP_MOD_ITER(30, 29) +XNDIVP_DIV_ITER(30, 29) +XNDIVP_MOD_ITER(31, 30) +XNDIVP_DIV_ITER(31, 30) +XNDIVP_MOD_ITER(32, 31) +XNDIVP_DIV_ITER(32, 31) +XNDIVP_MOD_ITER(33, 32) +XNDIVP_DIV_ITER(33, 32) +XNDIVP_MOD_ITER(34, 33) +XNDIVP_DIV_ITER(34, 33) +XNDIVP_MOD_ITER(35, 34) +XNDIVP_DIV_ITER(35, 34) +XNDIVP_MOD_ITER(36, 35) +XNDIVP_DIV_ITER(36, 35) +XNDIVP_MOD_ITER(37, 36) +XNDIVP_DIV_ITER(37, 36) +XNDIVP_MOD_ITER(38, 37) +XNDIVP_DIV_ITER(38, 37) +XNDIVP_MOD_ITER(39, 38) +XNDIVP_DIV_ITER(39, 38) +XNDIVP_MOD_ITER(40, 39) +XNDIVP_DIV_ITER(40, 39) +XNDIVP_MOD_ITER(41, 40) +XNDIVP_DIV_ITER(41, 40) +XNDIVP_MOD_ITER(42, 41) +XNDIVP_DIV_ITER(42, 41) +XNDIVP_MOD_ITER(43, 42) +XNDIVP_DIV_ITER(43, 42) +XNDIVP_MOD_ITER(44, 43) +XNDIVP_DIV_ITER(44, 43) +XNDIVP_MOD_ITER(45, 44) +XNDIVP_DIV_ITER(45, 44) +XNDIVP_MOD_ITER(46, 45) +XNDIVP_DIV_ITER(46, 45) +XNDIVP_MOD_ITER(47, 46) +XNDIVP_DIV_ITER(47, 46) +XNDIVP_MOD_ITER(48, 47) +XNDIVP_DIV_ITER(48, 47) +XNDIVP_MOD_ITER(49, 48) +XNDIVP_DIV_ITER(49, 48) +XNDIVP_MOD_ITER(50, 49) +XNDIVP_DIV_ITER(50, 49) +XNDIVP_MOD_ITER(51, 50) +XNDIVP_DIV_ITER(51, 50) +XNDIVP_MOD_ITER(52, 51) +XNDIVP_DIV_ITER(52, 51) +XNDIVP_MOD_ITER(53, 52) +XNDIVP_DIV_ITER(53, 52) +XNDIVP_MOD_ITER(54, 53) +XNDIVP_DIV_ITER(54, 53) +XNDIVP_MOD_ITER(55, 54) +XNDIVP_DIV_ITER(55, 54) +XNDIVP_MOD_ITER(56, 55) +XNDIVP_DIV_ITER(56, 55) +XNDIVP_MOD_ITER(57, 56) +XNDIVP_DIV_ITER(57, 56) +XNDIVP_MOD_ITER(58, 57) +XNDIVP_DIV_ITER(58, 57) +XNDIVP_MOD_ITER(59, 58) +XNDIVP_DIV_ITER(59, 58) +XNDIVP_MOD_ITER(60, 59) +XNDIVP_DIV_ITER(60, 59) +XNDIVP_MOD_ITER(61, 60) +XNDIVP_DIV_ITER(61, 60) +XNDIVP_MOD_ITER(62, 61) +XNDIVP_DIV_ITER(62, 61) +XNDIVP_MOD_ITER(63, 62) +XNDIVP_DIV_ITER(63, 62) +XNDIVP_MOD_ITER(64, 63) +XNDIVP_DIV_ITER(64, 63) +XNDIVP_MOD_ITER(65, 64) +XNDIVP_DIV_ITER(65, 64) +XNDIVP_MOD_ITER(66, 65) +XNDIVP_DIV_ITER(66, 65) +XNDIVP_MOD_ITER(67, 66) +XNDIVP_DIV_ITER(67, 66) +XNDIVP_MOD_ITER(68, 67) +XNDIVP_DIV_ITER(68, 67) +XNDIVP_MOD_ITER(69, 68) +XNDIVP_DIV_ITER(69, 68) +XNDIVP_MOD_ITER(70, 69) +XNDIVP_DIV_ITER(70, 69) +XNDIVP_MOD_ITER(71, 70) +XNDIVP_DIV_ITER(71, 70) +XNDIVP_MOD_ITER(72, 71) +XNDIVP_DIV_ITER(72, 71) +XNDIVP_MOD_ITER(73, 72) +XNDIVP_DIV_ITER(73, 72) +XNDIVP_MOD_ITER(74, 73) +XNDIVP_DIV_ITER(74, 73) +XNDIVP_MOD_ITER(75, 74) +XNDIVP_DIV_ITER(75, 74) +XNDIVP_MOD_ITER(76, 75) +XNDIVP_DIV_ITER(76, 75) +XNDIVP_MOD_ITER(77, 76) +XNDIVP_DIV_ITER(77, 76) +XNDIVP_MOD_ITER(78, 77) +XNDIVP_DIV_ITER(78, 77) +XNDIVP_MOD_ITER(79, 78) +XNDIVP_DIV_ITER(79, 78) +XNDIVP_MOD_ITER(80, 79) +XNDIVP_DIV_ITER(80, 79) +XNDIVP_MOD_ITER(81, 80) +XNDIVP_DIV_ITER(81, 80) +XNDIVP_MOD_ITER(82, 81) +XNDIVP_DIV_ITER(82, 81) +XNDIVP_MOD_ITER(83, 82) +XNDIVP_DIV_ITER(83, 82) +XNDIVP_MOD_ITER(84, 83) +XNDIVP_DIV_ITER(84, 83) +XNDIVP_MOD_ITER(85, 84) +XNDIVP_DIV_ITER(85, 84) +XNDIVP_MOD_ITER(86, 85) +XNDIVP_DIV_ITER(86, 85) +XNDIVP_MOD_ITER(87, 86) +XNDIVP_DIV_ITER(87, 86) +XNDIVP_MOD_ITER(88, 87) +XNDIVP_DIV_ITER(88, 87) +XNDIVP_MOD_ITER(89, 88) +XNDIVP_DIV_ITER(89, 88) +XNDIVP_MOD_ITER(90, 89) +XNDIVP_DIV_ITER(90, 89) +XNDIVP_MOD_ITER(91, 90) +XNDIVP_DIV_ITER(91, 90) +XNDIVP_MOD_ITER(92, 91) +XNDIVP_DIV_ITER(92, 91) +XNDIVP_MOD_ITER(93, 92) +XNDIVP_DIV_ITER(93, 92) +XNDIVP_MOD_ITER(94, 93) +XNDIVP_DIV_ITER(94, 93) +XNDIVP_MOD_ITER(95, 94) +XNDIVP_DIV_ITER(95, 94) +XNDIVP_MOD_ITER(96, 95) +XNDIVP_DIV_ITER(96, 95) +XNDIVP_MOD_ITER(97, 96) +XNDIVP_DIV_ITER(97, 96) +XNDIVP_MOD_ITER(98, 97) +XNDIVP_DIV_ITER(98, 97) +XNDIVP_MOD_ITER(99, 98) +XNDIVP_DIV_ITER(99, 98) +XNDIVP_MOD_ITER(100, 99) +XNDIVP_DIV_ITER(100, 99) +XNDIVP_MOD_ITER(101, 100) +XNDIVP_DIV_ITER(101, 100) +XNDIVP_MOD_ITER(102, 101) +XNDIVP_DIV_ITER(102, 101) +XNDIVP_MOD_ITER(103, 102) +XNDIVP_DIV_ITER(103, 102) +XNDIVP_MOD_ITER(104, 103) +XNDIVP_DIV_ITER(104, 103) +XNDIVP_MOD_ITER(105, 104) +XNDIVP_DIV_ITER(105, 104) +XNDIVP_MOD_ITER(106, 105) +XNDIVP_DIV_ITER(106, 105) +XNDIVP_MOD_ITER(107, 106) +XNDIVP_DIV_ITER(107, 106) +XNDIVP_MOD_ITER(108, 107) +XNDIVP_DIV_ITER(108, 107) +XNDIVP_MOD_ITER(109, 108) +XNDIVP_DIV_ITER(109, 108) +XNDIVP_MOD_ITER(110, 109) +XNDIVP_DIV_ITER(110, 109) +XNDIVP_MOD_ITER(111, 110) +XNDIVP_DIV_ITER(111, 110) +XNDIVP_MOD_ITER(112, 111) +XNDIVP_DIV_ITER(112, 111) +XNDIVP_MOD_ITER(113, 112) +XNDIVP_DIV_ITER(113, 112) +XNDIVP_MOD_ITER(114, 113) +XNDIVP_DIV_ITER(114, 113) +XNDIVP_MOD_ITER(115, 114) +XNDIVP_DIV_ITER(115, 114) +XNDIVP_MOD_ITER(116, 115) +XNDIVP_DIV_ITER(116, 115) +XNDIVP_MOD_ITER(117, 116) +XNDIVP_DIV_ITER(117, 116) +XNDIVP_MOD_ITER(118, 117) +XNDIVP_DIV_ITER(118, 117) +XNDIVP_MOD_ITER(119, 118) +XNDIVP_DIV_ITER(119, 118) +XNDIVP_MOD_ITER(120, 119) +XNDIVP_DIV_ITER(120, 119) +XNDIVP_MOD_ITER(121, 120) +XNDIVP_DIV_ITER(121, 120) +XNDIVP_MOD_ITER(122, 121) +XNDIVP_DIV_ITER(122, 121) +XNDIVP_MOD_ITER(123, 122) +XNDIVP_DIV_ITER(123, 122) +XNDIVP_MOD_ITER(124, 123) +XNDIVP_DIV_ITER(124, 123) +XNDIVP_MOD_ITER(125, 124) +XNDIVP_DIV_ITER(125, 124) +XNDIVP_MOD_ITER(126, 125) +XNDIVP_DIV_ITER(126, 125) +XNDIVP_MOD_ITER(127, 126) +XNDIVP_DIV_ITER(127, 126) +XNDIVP_MOD_ITER(128, 127) +XNDIVP_DIV_ITER(128, 127) +XNDIVP_MOD_ITER(129, 128) +XNDIVP_DIV_ITER(129, 128) +XNDIVP_MOD_ITER(130, 129) +XNDIVP_DIV_ITER(130, 129) +XNDIVP_MOD_ITER(131, 130) +XNDIVP_DIV_ITER(131, 130) +XNDIVP_MOD_ITER(132, 131) +XNDIVP_DIV_ITER(132, 131) +XNDIVP_MOD_ITER(133, 132) +XNDIVP_DIV_ITER(133, 132) +XNDIVP_MOD_ITER(134, 133) +XNDIVP_DIV_ITER(134, 133) +XNDIVP_MOD_ITER(135, 134) +XNDIVP_DIV_ITER(135, 134) +XNDIVP_MOD_ITER(136, 135) +XNDIVP_DIV_ITER(136, 135) +XNDIVP_MOD_ITER(137, 136) +XNDIVP_DIV_ITER(137, 136) +XNDIVP_MOD_ITER(138, 137) +XNDIVP_DIV_ITER(138, 137) +XNDIVP_MOD_ITER(139, 138) +XNDIVP_DIV_ITER(139, 138) +XNDIVP_MOD_ITER(140, 139) +XNDIVP_DIV_ITER(140, 139) +XNDIVP_MOD_ITER(141, 140) +XNDIVP_DIV_ITER(141, 140) +XNDIVP_MOD_ITER(142, 141) +XNDIVP_DIV_ITER(142, 141) +XNDIVP_MOD_ITER(143, 142) +XNDIVP_DIV_ITER(143, 142) +XNDIVP_MOD_ITER(144, 143) +XNDIVP_DIV_ITER(144, 143) +XNDIVP_MOD_ITER(145, 144) +XNDIVP_DIV_ITER(145, 144) +XNDIVP_MOD_ITER(146, 145) +XNDIVP_DIV_ITER(146, 145) +XNDIVP_MOD_ITER(147, 146) +XNDIVP_DIV_ITER(147, 146) +XNDIVP_MOD_ITER(148, 147) +XNDIVP_DIV_ITER(148, 147) +XNDIVP_MOD_ITER(149, 148) +XNDIVP_DIV_ITER(149, 148) +XNDIVP_MOD_ITER(150, 149) +XNDIVP_DIV_ITER(150, 149) +XNDIVP_MOD_ITER(151, 150) +XNDIVP_DIV_ITER(151, 150) +XNDIVP_MOD_ITER(152, 151) +XNDIVP_DIV_ITER(152, 151) +XNDIVP_MOD_ITER(153, 152) +XNDIVP_DIV_ITER(153, 152) +XNDIVP_MOD_ITER(154, 153) +XNDIVP_DIV_ITER(154, 153) +XNDIVP_MOD_ITER(155, 154) +XNDIVP_DIV_ITER(155, 154) +XNDIVP_MOD_ITER(156, 155) +XNDIVP_DIV_ITER(156, 155) +XNDIVP_MOD_ITER(157, 156) +XNDIVP_DIV_ITER(157, 156) +XNDIVP_MOD_ITER(158, 157) +XNDIVP_DIV_ITER(158, 157) +XNDIVP_MOD_ITER(159, 158) +XNDIVP_DIV_ITER(159, 158) +XNDIVP_MOD_ITER(160, 159) +XNDIVP_DIV_ITER(160, 159) +XNDIVP_MOD_ITER(161, 160) +XNDIVP_DIV_ITER(161, 160) +XNDIVP_MOD_ITER(162, 161) +XNDIVP_DIV_ITER(162, 161) +XNDIVP_MOD_ITER(163, 162) +XNDIVP_DIV_ITER(163, 162) +XNDIVP_MOD_ITER(164, 163) +XNDIVP_DIV_ITER(164, 163) +XNDIVP_MOD_ITER(165, 164) +XNDIVP_DIV_ITER(165, 164) +XNDIVP_MOD_ITER(166, 165) +XNDIVP_DIV_ITER(166, 165) +XNDIVP_MOD_ITER(167, 166) +XNDIVP_DIV_ITER(167, 166) +XNDIVP_MOD_ITER(168, 167) +XNDIVP_DIV_ITER(168, 167) +XNDIVP_MOD_ITER(169, 168) +XNDIVP_DIV_ITER(169, 168) +XNDIVP_MOD_ITER(170, 169) +XNDIVP_DIV_ITER(170, 169) +XNDIVP_MOD_ITER(171, 170) +XNDIVP_DIV_ITER(171, 170) +XNDIVP_MOD_ITER(172, 171) +XNDIVP_DIV_ITER(172, 171) +XNDIVP_MOD_ITER(173, 172) +XNDIVP_DIV_ITER(173, 172) +XNDIVP_MOD_ITER(174, 173) +XNDIVP_DIV_ITER(174, 173) +XNDIVP_MOD_ITER(175, 174) +XNDIVP_DIV_ITER(175, 174) +XNDIVP_MOD_ITER(176, 175) +XNDIVP_DIV_ITER(176, 175) +XNDIVP_MOD_ITER(177, 176) +XNDIVP_DIV_ITER(177, 176) +XNDIVP_MOD_ITER(178, 177) +XNDIVP_DIV_ITER(178, 177) +XNDIVP_MOD_ITER(179, 178) +XNDIVP_DIV_ITER(179, 178) +XNDIVP_MOD_ITER(180, 179) +XNDIVP_DIV_ITER(180, 179) +XNDIVP_MOD_ITER(181, 180) +XNDIVP_DIV_ITER(181, 180) +XNDIVP_MOD_ITER(182, 181) +XNDIVP_DIV_ITER(182, 181) +XNDIVP_MOD_ITER(183, 182) +XNDIVP_DIV_ITER(183, 182) +XNDIVP_MOD_ITER(184, 183) +XNDIVP_DIV_ITER(184, 183) +XNDIVP_MOD_ITER(185, 184) +XNDIVP_DIV_ITER(185, 184) +XNDIVP_MOD_ITER(186, 185) +XNDIVP_DIV_ITER(186, 185) +XNDIVP_MOD_ITER(187, 186) +XNDIVP_DIV_ITER(187, 186) +XNDIVP_MOD_ITER(188, 187) +XNDIVP_DIV_ITER(188, 187) +XNDIVP_MOD_ITER(189, 188) +XNDIVP_DIV_ITER(189, 188) +XNDIVP_MOD_ITER(190, 189) +XNDIVP_DIV_ITER(190, 189) +XNDIVP_MOD_ITER(191, 190) +XNDIVP_DIV_ITER(191, 190) +XNDIVP_MOD_ITER(192, 191) +XNDIVP_DIV_ITER(192, 191) +XNDIVP_MOD_ITER(193, 192) +XNDIVP_DIV_ITER(193, 192) +XNDIVP_MOD_ITER(194, 193) +XNDIVP_DIV_ITER(194, 193) +XNDIVP_MOD_ITER(195, 194) +XNDIVP_DIV_ITER(195, 194) +XNDIVP_MOD_ITER(196, 195) +XNDIVP_DIV_ITER(196, 195) +XNDIVP_MOD_ITER(197, 196) +XNDIVP_DIV_ITER(197, 196) +XNDIVP_MOD_ITER(198, 197) +XNDIVP_DIV_ITER(198, 197) +XNDIVP_MOD_ITER(199, 198) +XNDIVP_DIV_ITER(199, 198) +XNDIVP_MOD_ITER(200, 199) +XNDIVP_DIV_ITER(200, 199) +XNDIVP_MOD_ITER(201, 200) +XNDIVP_DIV_ITER(201, 200) +XNDIVP_MOD_ITER(202, 201) +XNDIVP_DIV_ITER(202, 201) +XNDIVP_MOD_ITER(203, 202) +XNDIVP_DIV_ITER(203, 202) +XNDIVP_MOD_ITER(204, 203) +XNDIVP_DIV_ITER(204, 203) +XNDIVP_MOD_ITER(205, 204) +XNDIVP_DIV_ITER(205, 204) +XNDIVP_MOD_ITER(206, 205) +XNDIVP_DIV_ITER(206, 205) +XNDIVP_MOD_ITER(207, 206) +XNDIVP_DIV_ITER(207, 206) +XNDIVP_MOD_ITER(208, 207) +XNDIVP_DIV_ITER(208, 207) +XNDIVP_MOD_ITER(209, 208) +XNDIVP_DIV_ITER(209, 208) +XNDIVP_MOD_ITER(210, 209) +XNDIVP_DIV_ITER(210, 209) +XNDIVP_MOD_ITER(211, 210) +XNDIVP_DIV_ITER(211, 210) +XNDIVP_MOD_ITER(212, 211) +XNDIVP_DIV_ITER(212, 211) +XNDIVP_MOD_ITER(213, 212) +XNDIVP_DIV_ITER(213, 212) +XNDIVP_MOD_ITER(214, 213) +XNDIVP_DIV_ITER(214, 213) +XNDIVP_MOD_ITER(215, 214) +XNDIVP_DIV_ITER(215, 214) +XNDIVP_MOD_ITER(216, 215) +XNDIVP_DIV_ITER(216, 215) +XNDIVP_MOD_ITER(217, 216) +XNDIVP_DIV_ITER(217, 216) +XNDIVP_MOD_ITER(218, 217) +XNDIVP_DIV_ITER(218, 217) +XNDIVP_MOD_ITER(219, 218) +XNDIVP_DIV_ITER(219, 218) +XNDIVP_MOD_ITER(220, 219) +XNDIVP_DIV_ITER(220, 219) +XNDIVP_MOD_ITER(221, 220) +XNDIVP_DIV_ITER(221, 220) +XNDIVP_MOD_ITER(222, 221) +XNDIVP_DIV_ITER(222, 221) +XNDIVP_MOD_ITER(223, 222) +XNDIVP_DIV_ITER(223, 222) +XNDIVP_MOD_ITER(224, 223) +XNDIVP_DIV_ITER(224, 223) +XNDIVP_MOD_ITER(225, 224) +XNDIVP_DIV_ITER(225, 224) +XNDIVP_MOD_ITER(226, 225) +XNDIVP_DIV_ITER(226, 225) +XNDIVP_MOD_ITER(227, 226) +XNDIVP_DIV_ITER(227, 226) +XNDIVP_MOD_ITER(228, 227) +XNDIVP_DIV_ITER(228, 227) +XNDIVP_MOD_ITER(229, 228) +XNDIVP_DIV_ITER(229, 228) +XNDIVP_MOD_ITER(230, 229) +XNDIVP_DIV_ITER(230, 229) +XNDIVP_MOD_ITER(231, 230) +XNDIVP_DIV_ITER(231, 230) +XNDIVP_MOD_ITER(232, 231) +XNDIVP_DIV_ITER(232, 231) +XNDIVP_MOD_ITER(233, 232) +XNDIVP_DIV_ITER(233, 232) +XNDIVP_MOD_ITER(234, 233) +XNDIVP_DIV_ITER(234, 233) +XNDIVP_MOD_ITER(235, 234) +XNDIVP_DIV_ITER(235, 234) +XNDIVP_MOD_ITER(236, 235) +XNDIVP_DIV_ITER(236, 235) +XNDIVP_MOD_ITER(237, 236) +XNDIVP_DIV_ITER(237, 236) +XNDIVP_MOD_ITER(238, 237) +XNDIVP_DIV_ITER(238, 237) +XNDIVP_MOD_ITER(239, 238) +XNDIVP_DIV_ITER(239, 238) +XNDIVP_MOD_ITER(240, 239) +XNDIVP_DIV_ITER(240, 239) +XNDIVP_MOD_ITER(241, 240) +XNDIVP_DIV_ITER(241, 240) +XNDIVP_MOD_ITER(242, 241) +XNDIVP_DIV_ITER(242, 241) +XNDIVP_MOD_ITER(243, 242) +XNDIVP_DIV_ITER(243, 242) +XNDIVP_MOD_ITER(244, 243) +XNDIVP_DIV_ITER(244, 243) +XNDIVP_MOD_ITER(245, 244) +XNDIVP_DIV_ITER(245, 244) +XNDIVP_MOD_ITER(246, 245) +XNDIVP_DIV_ITER(246, 245) +XNDIVP_MOD_ITER(247, 246) +XNDIVP_DIV_ITER(247, 246) +XNDIVP_MOD_ITER(248, 247) +XNDIVP_DIV_ITER(248, 247) +XNDIVP_MOD_ITER(249, 248) +XNDIVP_DIV_ITER(249, 248) +XNDIVP_MOD_ITER(250, 249) +XNDIVP_DIV_ITER(250, 249) +XNDIVP_MOD_ITER(251, 250) +XNDIVP_DIV_ITER(251, 250) +XNDIVP_MOD_ITER(252, 251) +XNDIVP_DIV_ITER(252, 251) +XNDIVP_MOD_ITER(253, 252) +XNDIVP_DIV_ITER(253, 252) +XNDIVP_MOD_ITER(254, 253) +XNDIVP_DIV_ITER(254, 253) +XNDIVP_MOD_ITER(255, 254) +XNDIVP_DIV_ITER(255, 254) +XNDIVP_MOD_ITER(256, 255) +XNDIVP_DIV_ITER(256, 255) +XNDIVP_MOD_ITER(257, 256) +XNDIVP_DIV_ITER(257, 256) +XNDIVP_MOD_ITER(258, 257) +XNDIVP_DIV_ITER(258, 257) +XNDIVP_MOD_ITER(259, 258) +XNDIVP_DIV_ITER(259, 258) +XNDIVP_MOD_ITER(260, 259) +XNDIVP_DIV_ITER(260, 259) +XNDIVP_MOD_ITER(261, 260) +XNDIVP_DIV_ITER(261, 260) +XNDIVP_MOD_ITER(262, 261) +XNDIVP_DIV_ITER(262, 261) +XNDIVP_MOD_ITER(263, 262) +XNDIVP_DIV_ITER(263, 262) +XNDIVP_MOD_ITER(264, 263) +XNDIVP_DIV_ITER(264, 263) +XNDIVP_MOD_ITER(265, 264) +XNDIVP_DIV_ITER(265, 264) +XNDIVP_MOD_ITER(266, 265) +XNDIVP_DIV_ITER(266, 265) +XNDIVP_MOD_ITER(267, 266) +XNDIVP_DIV_ITER(267, 266) +XNDIVP_MOD_ITER(268, 267) +XNDIVP_DIV_ITER(268, 267) +XNDIVP_MOD_ITER(269, 268) +XNDIVP_DIV_ITER(269, 268) +XNDIVP_MOD_ITER(270, 269) +XNDIVP_DIV_ITER(270, 269) +XNDIVP_MOD_ITER(271, 270) +XNDIVP_DIV_ITER(271, 270) +XNDIVP_MOD_ITER(272, 271) +XNDIVP_DIV_ITER(272, 271) +XNDIVP_MOD_ITER(273, 272) +XNDIVP_DIV_ITER(273, 272) +XNDIVP_MOD_ITER(274, 273) +XNDIVP_DIV_ITER(274, 273) +XNDIVP_MOD_ITER(275, 274) +XNDIVP_DIV_ITER(275, 274) +XNDIVP_MOD_ITER(276, 275) +XNDIVP_DIV_ITER(276, 275) +XNDIVP_MOD_ITER(277, 276) +XNDIVP_DIV_ITER(277, 276) +XNDIVP_MOD_ITER(278, 277) +XNDIVP_DIV_ITER(278, 277) +XNDIVP_MOD_ITER(279, 278) +XNDIVP_DIV_ITER(279, 278) +XNDIVP_MOD_ITER(280, 279) +XNDIVP_DIV_ITER(280, 279) +XNDIVP_MOD_ITER(281, 280) +XNDIVP_DIV_ITER(281, 280) +XNDIVP_MOD_ITER(282, 281) +XNDIVP_DIV_ITER(282, 281) +XNDIVP_MOD_ITER(283, 282) +XNDIVP_DIV_ITER(283, 282) +XNDIVP_MOD_ITER(284, 283) +XNDIVP_DIV_ITER(284, 283) +XNDIVP_MOD_ITER(285, 284) +XNDIVP_DIV_ITER(285, 284) +XNDIVP_MOD_ITER(286, 285) +XNDIVP_DIV_ITER(286, 285) +XNDIVP_MOD_ITER(287, 286) +XNDIVP_DIV_ITER(287, 286) +XNDIVP_MOD_ITER(288, 287) +XNDIVP_DIV_ITER(288, 287) +XNDIVP_MOD_ITER(289, 288) +XNDIVP_DIV_ITER(289, 288) +XNDIVP_MOD_ITER(290, 289) +XNDIVP_DIV_ITER(290, 289) +XNDIVP_MOD_ITER(291, 290) +XNDIVP_DIV_ITER(291, 290) +XNDIVP_MOD_ITER(292, 291) +XNDIVP_DIV_ITER(292, 291) +XNDIVP_MOD_ITER(293, 292) +XNDIVP_DIV_ITER(293, 292) +XNDIVP_MOD_ITER(294, 293) +XNDIVP_DIV_ITER(294, 293) +XNDIVP_MOD_ITER(295, 294) +XNDIVP_DIV_ITER(295, 294) +XNDIVP_MOD_ITER(296, 295) +XNDIVP_DIV_ITER(296, 295) +XNDIVP_MOD_ITER(297, 296) +XNDIVP_DIV_ITER(297, 296) +XNDIVP_MOD_ITER(298, 297) +XNDIVP_DIV_ITER(298, 297) +XNDIVP_MOD_ITER(299, 298) +XNDIVP_DIV_ITER(299, 298) +XNDIVP_MOD_ITER(300, 299) +XNDIVP_DIV_ITER(300, 299) +XNDIVP_MOD_ITER(301, 300) +XNDIVP_DIV_ITER(301, 300) +XNDIVP_MOD_ITER(302, 301) +XNDIVP_DIV_ITER(302, 301) +XNDIVP_MOD_ITER(303, 302) +XNDIVP_DIV_ITER(303, 302) +XNDIVP_MOD_ITER(304, 303) +XNDIVP_DIV_ITER(304, 303) +XNDIVP_MOD_ITER(305, 304) +XNDIVP_DIV_ITER(305, 304) +XNDIVP_MOD_ITER(306, 305) +XNDIVP_DIV_ITER(306, 305) +XNDIVP_MOD_ITER(307, 306) +XNDIVP_DIV_ITER(307, 306) +XNDIVP_MOD_ITER(308, 307) +XNDIVP_DIV_ITER(308, 307) +XNDIVP_MOD_ITER(309, 308) +XNDIVP_DIV_ITER(309, 308) +XNDIVP_MOD_ITER(310, 309) +XNDIVP_DIV_ITER(310, 309) +XNDIVP_MOD_ITER(311, 310) +XNDIVP_DIV_ITER(311, 310) +XNDIVP_MOD_ITER(312, 311) +XNDIVP_DIV_ITER(312, 311) +XNDIVP_MOD_ITER(313, 312) +XNDIVP_DIV_ITER(313, 312) +XNDIVP_MOD_ITER(314, 313) +XNDIVP_DIV_ITER(314, 313) +XNDIVP_MOD_ITER(315, 314) +XNDIVP_DIV_ITER(315, 314) +XNDIVP_MOD_ITER(316, 315) +XNDIVP_DIV_ITER(316, 315) +XNDIVP_MOD_ITER(317, 316) +XNDIVP_DIV_ITER(317, 316) +XNDIVP_MOD_ITER(318, 317) +XNDIVP_DIV_ITER(318, 317) +XNDIVP_MOD_ITER(319, 318) +XNDIVP_DIV_ITER(319, 318) +XNDIVP_MOD_ITER(320, 319) +XNDIVP_DIV_ITER(320, 319) +XNDIVP_MOD_ITER(321, 320) +XNDIVP_DIV_ITER(321, 320) +XNDIVP_MOD_ITER(322, 321) +XNDIVP_DIV_ITER(322, 321) +XNDIVP_MOD_ITER(323, 322) +XNDIVP_DIV_ITER(323, 322) +XNDIVP_MOD_ITER(324, 323) +XNDIVP_DIV_ITER(324, 323) +XNDIVP_MOD_ITER(325, 324) +XNDIVP_DIV_ITER(325, 324) +XNDIVP_MOD_ITER(326, 325) +XNDIVP_DIV_ITER(326, 325) +XNDIVP_MOD_ITER(327, 326) +XNDIVP_DIV_ITER(327, 326) +XNDIVP_MOD_ITER(328, 327) +XNDIVP_DIV_ITER(328, 327) +XNDIVP_MOD_ITER(329, 328) +XNDIVP_DIV_ITER(329, 328) +XNDIVP_MOD_ITER(330, 329) +XNDIVP_DIV_ITER(330, 329) +XNDIVP_MOD_ITER(331, 330) +XNDIVP_DIV_ITER(331, 330) +XNDIVP_MOD_ITER(332, 331) +XNDIVP_DIV_ITER(332, 331) +XNDIVP_MOD_ITER(333, 332) +XNDIVP_DIV_ITER(333, 332) +XNDIVP_MOD_ITER(334, 333) +XNDIVP_DIV_ITER(334, 333) +XNDIVP_MOD_ITER(335, 334) +XNDIVP_DIV_ITER(335, 334) +XNDIVP_MOD_ITER(336, 335) +XNDIVP_DIV_ITER(336, 335) +XNDIVP_MOD_ITER(337, 336) +XNDIVP_DIV_ITER(337, 336) +XNDIVP_MOD_ITER(338, 337) +XNDIVP_DIV_ITER(338, 337) +XNDIVP_MOD_ITER(339, 338) +XNDIVP_DIV_ITER(339, 338) +XNDIVP_MOD_ITER(340, 339) +XNDIVP_DIV_ITER(340, 339) +XNDIVP_MOD_ITER(341, 340) +XNDIVP_DIV_ITER(341, 340) +XNDIVP_MOD_ITER(342, 341) +XNDIVP_DIV_ITER(342, 341) +XNDIVP_MOD_ITER(343, 342) +XNDIVP_DIV_ITER(343, 342) +XNDIVP_MOD_ITER(344, 343) +XNDIVP_DIV_ITER(344, 343) +XNDIVP_MOD_ITER(345, 344) +XNDIVP_DIV_ITER(345, 344) +XNDIVP_MOD_ITER(346, 345) +XNDIVP_DIV_ITER(346, 345) +XNDIVP_MOD_ITER(347, 346) +XNDIVP_DIV_ITER(347, 346) +XNDIVP_MOD_ITER(348, 347) +XNDIVP_DIV_ITER(348, 347) +XNDIVP_MOD_ITER(349, 348) +XNDIVP_DIV_ITER(349, 348) +XNDIVP_MOD_ITER(350, 349) +XNDIVP_DIV_ITER(350, 349) +XNDIVP_MOD_ITER(351, 350) +XNDIVP_DIV_ITER(351, 350) +XNDIVP_MOD_ITER(352, 351) +XNDIVP_DIV_ITER(352, 351) +XNDIVP_MOD_ITER(353, 352) +XNDIVP_DIV_ITER(353, 352) +XNDIVP_MOD_ITER(354, 353) +XNDIVP_DIV_ITER(354, 353) +XNDIVP_MOD_ITER(355, 354) +XNDIVP_DIV_ITER(355, 354) +XNDIVP_MOD_ITER(356, 355) +XNDIVP_DIV_ITER(356, 355) +XNDIVP_MOD_ITER(357, 356) +XNDIVP_DIV_ITER(357, 356) +XNDIVP_MOD_ITER(358, 357) +XNDIVP_DIV_ITER(358, 357) +XNDIVP_MOD_ITER(359, 358) +XNDIVP_DIV_ITER(359, 358) +XNDIVP_MOD_ITER(360, 359) +XNDIVP_DIV_ITER(360, 359) +XNDIVP_MOD_ITER(361, 360) +XNDIVP_DIV_ITER(361, 360) +XNDIVP_MOD_ITER(362, 361) +XNDIVP_DIV_ITER(362, 361) +XNDIVP_MOD_ITER(363, 362) +XNDIVP_DIV_ITER(363, 362) +XNDIVP_MOD_ITER(364, 363) +XNDIVP_DIV_ITER(364, 363) +XNDIVP_MOD_ITER(365, 364) +XNDIVP_DIV_ITER(365, 364) +XNDIVP_MOD_ITER(366, 365) +XNDIVP_DIV_ITER(366, 365) +XNDIVP_MOD_ITER(367, 366) +XNDIVP_DIV_ITER(367, 366) +XNDIVP_MOD_ITER(368, 367) +XNDIVP_DIV_ITER(368, 367) +XNDIVP_MOD_ITER(369, 368) +XNDIVP_DIV_ITER(369, 368) +XNDIVP_MOD_ITER(370, 369) +XNDIVP_DIV_ITER(370, 369) +XNDIVP_MOD_ITER(371, 370) +XNDIVP_DIV_ITER(371, 370) +XNDIVP_MOD_ITER(372, 371) +XNDIVP_DIV_ITER(372, 371) +XNDIVP_MOD_ITER(373, 372) +XNDIVP_DIV_ITER(373, 372) +XNDIVP_MOD_ITER(374, 373) +XNDIVP_DIV_ITER(374, 373) +XNDIVP_MOD_ITER(375, 374) +XNDIVP_DIV_ITER(375, 374) +XNDIVP_MOD_ITER(376, 375) +XNDIVP_DIV_ITER(376, 375) +XNDIVP_MOD_ITER(377, 376) +XNDIVP_DIV_ITER(377, 376) +XNDIVP_MOD_ITER(378, 377) +XNDIVP_DIV_ITER(378, 377) +XNDIVP_MOD_ITER(379, 378) +XNDIVP_DIV_ITER(379, 378) +XNDIVP_MOD_ITER(380, 379) +XNDIVP_DIV_ITER(380, 379) +XNDIVP_MOD_ITER(381, 380) +XNDIVP_DIV_ITER(381, 380) +XNDIVP_MOD_ITER(382, 381) +XNDIVP_DIV_ITER(382, 381) +XNDIVP_MOD_ITER(383, 382) +XNDIVP_DIV_ITER(383, 382) +XNDIVP_MOD_ITER(384, 383) +XNDIVP_DIV_ITER(384, 383) +XNDIVP_MOD_ITER(385, 384) +XNDIVP_DIV_ITER(385, 384) +XNDIVP_MOD_ITER(386, 385) +XNDIVP_DIV_ITER(386, 385) +XNDIVP_MOD_ITER(387, 386) +XNDIVP_DIV_ITER(387, 386) +XNDIVP_MOD_ITER(388, 387) +XNDIVP_DIV_ITER(388, 387) +XNDIVP_MOD_ITER(389, 388) +XNDIVP_DIV_ITER(389, 388) +XNDIVP_MOD_ITER(390, 389) +XNDIVP_DIV_ITER(390, 389) +XNDIVP_MOD_ITER(391, 390) +XNDIVP_DIV_ITER(391, 390) +XNDIVP_MOD_ITER(392, 391) +XNDIVP_DIV_ITER(392, 391) +XNDIVP_MOD_ITER(393, 392) +XNDIVP_DIV_ITER(393, 392) +XNDIVP_MOD_ITER(394, 393) +XNDIVP_DIV_ITER(394, 393) +XNDIVP_MOD_ITER(395, 394) +XNDIVP_DIV_ITER(395, 394) +XNDIVP_MOD_ITER(396, 395) +XNDIVP_DIV_ITER(396, 395) +XNDIVP_MOD_ITER(397, 396) +XNDIVP_DIV_ITER(397, 396) +XNDIVP_MOD_ITER(398, 397) +XNDIVP_DIV_ITER(398, 397) +XNDIVP_MOD_ITER(399, 398) +XNDIVP_DIV_ITER(399, 398) +XNDIVP_MOD_ITER(400, 399) +XNDIVP_DIV_ITER(400, 399) +XNDIVP_MOD_ITER(401, 400) +XNDIVP_DIV_ITER(401, 400) +XNDIVP_MOD_ITER(402, 401) +XNDIVP_DIV_ITER(402, 401) +XNDIVP_MOD_ITER(403, 402) +XNDIVP_DIV_ITER(403, 402) +XNDIVP_MOD_ITER(404, 403) +XNDIVP_DIV_ITER(404, 403) +XNDIVP_MOD_ITER(405, 404) +XNDIVP_DIV_ITER(405, 404) +XNDIVP_MOD_ITER(406, 405) +XNDIVP_DIV_ITER(406, 405) +XNDIVP_MOD_ITER(407, 406) +XNDIVP_DIV_ITER(407, 406) +XNDIVP_MOD_ITER(408, 407) +XNDIVP_DIV_ITER(408, 407) +XNDIVP_MOD_ITER(409, 408) +XNDIVP_DIV_ITER(409, 408) +XNDIVP_MOD_ITER(410, 409) +XNDIVP_DIV_ITER(410, 409) +XNDIVP_MOD_ITER(411, 410) +XNDIVP_DIV_ITER(411, 410) +XNDIVP_MOD_ITER(412, 411) +XNDIVP_DIV_ITER(412, 411) +XNDIVP_MOD_ITER(413, 412) +XNDIVP_DIV_ITER(413, 412) +XNDIVP_MOD_ITER(414, 413) +XNDIVP_DIV_ITER(414, 413) +XNDIVP_MOD_ITER(415, 414) +XNDIVP_DIV_ITER(415, 414) +XNDIVP_MOD_ITER(416, 415) +XNDIVP_DIV_ITER(416, 415) +XNDIVP_MOD_ITER(417, 416) +XNDIVP_DIV_ITER(417, 416) +XNDIVP_MOD_ITER(418, 417) +XNDIVP_DIV_ITER(418, 417) +XNDIVP_MOD_ITER(419, 418) +XNDIVP_DIV_ITER(419, 418) +XNDIVP_MOD_ITER(420, 419) +XNDIVP_DIV_ITER(420, 419) +XNDIVP_MOD_ITER(421, 420) +XNDIVP_DIV_ITER(421, 420) +XNDIVP_MOD_ITER(422, 421) +XNDIVP_DIV_ITER(422, 421) +XNDIVP_MOD_ITER(423, 422) +XNDIVP_DIV_ITER(423, 422) +XNDIVP_MOD_ITER(424, 423) +XNDIVP_DIV_ITER(424, 423) +XNDIVP_MOD_ITER(425, 424) +XNDIVP_DIV_ITER(425, 424) +XNDIVP_MOD_ITER(426, 425) +XNDIVP_DIV_ITER(426, 425) +XNDIVP_MOD_ITER(427, 426) +XNDIVP_DIV_ITER(427, 426) +XNDIVP_MOD_ITER(428, 427) +XNDIVP_DIV_ITER(428, 427) +XNDIVP_MOD_ITER(429, 428) +XNDIVP_DIV_ITER(429, 428) +XNDIVP_MOD_ITER(430, 429) +XNDIVP_DIV_ITER(430, 429) +XNDIVP_MOD_ITER(431, 430) +XNDIVP_DIV_ITER(431, 430) +XNDIVP_MOD_ITER(432, 431) +XNDIVP_DIV_ITER(432, 431) +XNDIVP_MOD_ITER(433, 432) +XNDIVP_DIV_ITER(433, 432) +XNDIVP_MOD_ITER(434, 433) +XNDIVP_DIV_ITER(434, 433) +XNDIVP_MOD_ITER(435, 434) +XNDIVP_DIV_ITER(435, 434) +XNDIVP_MOD_ITER(436, 435) +XNDIVP_DIV_ITER(436, 435) +XNDIVP_MOD_ITER(437, 436) +XNDIVP_DIV_ITER(437, 436) +XNDIVP_MOD_ITER(438, 437) +XNDIVP_DIV_ITER(438, 437) +XNDIVP_MOD_ITER(439, 438) +XNDIVP_DIV_ITER(439, 438) +XNDIVP_MOD_ITER(440, 439) +XNDIVP_DIV_ITER(440, 439) +XNDIVP_MOD_ITER(441, 440) +XNDIVP_DIV_ITER(441, 440) +XNDIVP_MOD_ITER(442, 441) +XNDIVP_DIV_ITER(442, 441) +XNDIVP_MOD_ITER(443, 442) +XNDIVP_DIV_ITER(443, 442) +XNDIVP_MOD_ITER(444, 443) +XNDIVP_DIV_ITER(444, 443) +XNDIVP_MOD_ITER(445, 444) +XNDIVP_DIV_ITER(445, 444) +XNDIVP_MOD_ITER(446, 445) +XNDIVP_DIV_ITER(446, 445) +XNDIVP_MOD_ITER(447, 446) +XNDIVP_DIV_ITER(447, 446) +XNDIVP_MOD_ITER(448, 447) +XNDIVP_DIV_ITER(448, 447) +XNDIVP_MOD_ITER(449, 448) +XNDIVP_DIV_ITER(449, 448) +XNDIVP_MOD_ITER(450, 449) +XNDIVP_DIV_ITER(450, 449) +XNDIVP_MOD_ITER(451, 450) +XNDIVP_DIV_ITER(451, 450) +XNDIVP_MOD_ITER(452, 451) +XNDIVP_DIV_ITER(452, 451) +XNDIVP_MOD_ITER(453, 452) +XNDIVP_DIV_ITER(453, 452) +XNDIVP_MOD_ITER(454, 453) +XNDIVP_DIV_ITER(454, 453) +XNDIVP_MOD_ITER(455, 454) +XNDIVP_DIV_ITER(455, 454) +XNDIVP_MOD_ITER(456, 455) +XNDIVP_DIV_ITER(456, 455) +XNDIVP_MOD_ITER(457, 456) +XNDIVP_DIV_ITER(457, 456) +XNDIVP_MOD_ITER(458, 457) +XNDIVP_DIV_ITER(458, 457) +XNDIVP_MOD_ITER(459, 458) +XNDIVP_DIV_ITER(459, 458) +XNDIVP_MOD_ITER(460, 459) +XNDIVP_DIV_ITER(460, 459) +XNDIVP_MOD_ITER(461, 460) +XNDIVP_DIV_ITER(461, 460) +XNDIVP_MOD_ITER(462, 461) +XNDIVP_DIV_ITER(462, 461) +XNDIVP_MOD_ITER(463, 462) +XNDIVP_DIV_ITER(463, 462) +XNDIVP_MOD_ITER(464, 463) +XNDIVP_DIV_ITER(464, 463) +XNDIVP_MOD_ITER(465, 464) +XNDIVP_DIV_ITER(465, 464) +XNDIVP_MOD_ITER(466, 465) +XNDIVP_DIV_ITER(466, 465) +XNDIVP_MOD_ITER(467, 466) +XNDIVP_DIV_ITER(467, 466) +XNDIVP_MOD_ITER(468, 467) +XNDIVP_DIV_ITER(468, 467) +XNDIVP_MOD_ITER(469, 468) +XNDIVP_DIV_ITER(469, 468) +XNDIVP_MOD_ITER(470, 469) +XNDIVP_DIV_ITER(470, 469) +XNDIVP_MOD_ITER(471, 470) +XNDIVP_DIV_ITER(471, 470) +XNDIVP_MOD_ITER(472, 471) +XNDIVP_DIV_ITER(472, 471) +XNDIVP_MOD_ITER(473, 472) +XNDIVP_DIV_ITER(473, 472) +XNDIVP_MOD_ITER(474, 473) +XNDIVP_DIV_ITER(474, 473) +XNDIVP_MOD_ITER(475, 474) +XNDIVP_DIV_ITER(475, 474) +XNDIVP_MOD_ITER(476, 475) +XNDIVP_DIV_ITER(476, 475) +XNDIVP_MOD_ITER(477, 476) +XNDIVP_DIV_ITER(477, 476) +XNDIVP_MOD_ITER(478, 477) +XNDIVP_DIV_ITER(478, 477) +XNDIVP_MOD_ITER(479, 478) +XNDIVP_DIV_ITER(479, 478) +XNDIVP_MOD_ITER(480, 479) +XNDIVP_DIV_ITER(480, 479) +XNDIVP_MOD_ITER(481, 480) +XNDIVP_DIV_ITER(481, 480) +XNDIVP_MOD_ITER(482, 481) +XNDIVP_DIV_ITER(482, 481) +XNDIVP_MOD_ITER(483, 482) +XNDIVP_DIV_ITER(483, 482) +XNDIVP_MOD_ITER(484, 483) +XNDIVP_DIV_ITER(484, 483) +XNDIVP_MOD_ITER(485, 484) +XNDIVP_DIV_ITER(485, 484) +XNDIVP_MOD_ITER(486, 485) +XNDIVP_DIV_ITER(486, 485) +XNDIVP_MOD_ITER(487, 486) +XNDIVP_DIV_ITER(487, 486) +XNDIVP_MOD_ITER(488, 487) +XNDIVP_DIV_ITER(488, 487) +XNDIVP_MOD_ITER(489, 488) +XNDIVP_DIV_ITER(489, 488) +XNDIVP_MOD_ITER(490, 489) +XNDIVP_DIV_ITER(490, 489) +XNDIVP_MOD_ITER(491, 490) +XNDIVP_DIV_ITER(491, 490) +XNDIVP_MOD_ITER(492, 491) +XNDIVP_DIV_ITER(492, 491) +XNDIVP_MOD_ITER(493, 492) +XNDIVP_DIV_ITER(493, 492) +XNDIVP_MOD_ITER(494, 493) +XNDIVP_DIV_ITER(494, 493) +XNDIVP_MOD_ITER(495, 494) +XNDIVP_DIV_ITER(495, 494) +XNDIVP_MOD_ITER(496, 495) +XNDIVP_DIV_ITER(496, 495) +XNDIVP_MOD_ITER(497, 496) +XNDIVP_DIV_ITER(497, 496) +XNDIVP_MOD_ITER(498, 497) +XNDIVP_DIV_ITER(498, 497) +XNDIVP_MOD_ITER(499, 498) +XNDIVP_DIV_ITER(499, 498) +XNDIVP_MOD_ITER(500, 499) +XNDIVP_DIV_ITER(500, 499) +XNDIVP_MOD_ITER(501, 500) +XNDIVP_DIV_ITER(501, 500) +XNDIVP_MOD_ITER(502, 501) +XNDIVP_DIV_ITER(502, 501) +XNDIVP_MOD_ITER(503, 502) +XNDIVP_DIV_ITER(503, 502) +XNDIVP_MOD_ITER(504, 503) +XNDIVP_DIV_ITER(504, 503) +XNDIVP_MOD_ITER(505, 504) +XNDIVP_DIV_ITER(505, 504) +XNDIVP_MOD_ITER(506, 505) +XNDIVP_DIV_ITER(506, 505) +XNDIVP_MOD_ITER(507, 506) +XNDIVP_DIV_ITER(507, 506) +XNDIVP_MOD_ITER(508, 507) +XNDIVP_DIV_ITER(508, 507) +XNDIVP_MOD_ITER(509, 508) +XNDIVP_DIV_ITER(509, 508) +XNDIVP_MOD_ITER(510, 509) +XNDIVP_DIV_ITER(510, 509) +XNDIVP_MOD_ITER(511, 510) +XNDIVP_DIV_ITER(511, 510) +XNDIVP_MOD_ITER(512, 511) +XNDIVP_DIV_ITER(512, 511) +XNDIVP_MOD_ITER(513, 512) +XNDIVP_DIV_ITER(513, 512) +XNDIVP_MOD_ITER(514, 513) +XNDIVP_DIV_ITER(514, 513) +XNDIVP_MOD_ITER(515, 514) +XNDIVP_DIV_ITER(515, 514) +XNDIVP_MOD_ITER(516, 515) +XNDIVP_DIV_ITER(516, 515) +XNDIVP_MOD_ITER(517, 516) +XNDIVP_DIV_ITER(517, 516) +XNDIVP_MOD_ITER(518, 517) +XNDIVP_DIV_ITER(518, 517) +XNDIVP_MOD_ITER(519, 518) +XNDIVP_DIV_ITER(519, 518) +XNDIVP_MOD_ITER(520, 519) +XNDIVP_DIV_ITER(520, 519) +XNDIVP_MOD_ITER(521, 520) +XNDIVP_DIV_ITER(521, 520) +XNDIVP_MOD_ITER(522, 521) +XNDIVP_DIV_ITER(522, 521) +XNDIVP_MOD_ITER(523, 522) +XNDIVP_DIV_ITER(523, 522) +XNDIVP_MOD_ITER(524, 523) +XNDIVP_DIV_ITER(524, 523) +XNDIVP_MOD_ITER(525, 524) +XNDIVP_DIV_ITER(525, 524) +XNDIVP_MOD_ITER(526, 525) +XNDIVP_DIV_ITER(526, 525) +XNDIVP_MOD_ITER(527, 526) +XNDIVP_DIV_ITER(527, 526) +XNDIVP_MOD_ITER(528, 527) +XNDIVP_DIV_ITER(528, 527) +XNDIVP_MOD_ITER(529, 528) +XNDIVP_DIV_ITER(529, 528) +XNDIVP_MOD_ITER(530, 529) +XNDIVP_DIV_ITER(530, 529) +XNDIVP_MOD_ITER(531, 530) +XNDIVP_DIV_ITER(531, 530) +XNDIVP_MOD_ITER(532, 531) +XNDIVP_DIV_ITER(532, 531) +XNDIVP_MOD_ITER(533, 532) +XNDIVP_DIV_ITER(533, 532) +XNDIVP_MOD_ITER(534, 533) +XNDIVP_DIV_ITER(534, 533) +XNDIVP_MOD_ITER(535, 534) +XNDIVP_DIV_ITER(535, 534) +XNDIVP_MOD_ITER(536, 535) +XNDIVP_DIV_ITER(536, 535) +XNDIVP_MOD_ITER(537, 536) +XNDIVP_DIV_ITER(537, 536) +XNDIVP_MOD_ITER(538, 537) +XNDIVP_DIV_ITER(538, 537) +XNDIVP_MOD_ITER(539, 538) +XNDIVP_DIV_ITER(539, 538) +XNDIVP_MOD_ITER(540, 539) +XNDIVP_DIV_ITER(540, 539) +XNDIVP_MOD_ITER(541, 540) +XNDIVP_DIV_ITER(541, 540) +XNDIVP_MOD_ITER(542, 541) +XNDIVP_DIV_ITER(542, 541) +XNDIVP_MOD_ITER(543, 542) +XNDIVP_DIV_ITER(543, 542) +XNDIVP_MOD_ITER(544, 543) +XNDIVP_DIV_ITER(544, 543) diff -r 000000000000 -r 422835bc1aca crc32x86.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/crc32x86.c Mon Feb 09 01:15:00 2026 -0500 @@ -0,0 +1,203 @@ +/* x86-specific CRC routines */ + +#include "crc32.h" +#include "crc32i.h" +#include +#include + +#define BITREVERSE64EX(THIS) \ +( \ + (((THIS) & 0x0000000000000001) << 63) \ + | (((THIS) & 0x0000000000000002) << 61) \ + | (((THIS) & 0x0000000000000004) << 59) \ + | (((THIS) & 0x0000000000000008) << 57) \ + | (((THIS) & 0x0000000000000010) << 55) \ + | (((THIS) & 0x0000000000000020) << 53) \ + | (((THIS) & 0x0000000000000040) << 51) \ + | (((THIS) & 0x0000000000000080) << 49) \ + | (((THIS) & 0x0000000000000100) << 47) \ + | (((THIS) & 0x0000000000000200) << 45) \ + | (((THIS) & 0x0000000000000400) << 43) \ + | (((THIS) & 0x0000000000000800) << 41) \ + | (((THIS) & 0x0000000000001000) << 39) \ + | (((THIS) & 0x0000000000002000) << 37) \ + | (((THIS) & 0x0000000000004000) << 35) \ + | (((THIS) & 0x0000000000008000) << 33) \ + | (((THIS) & 0x0000000000010000) << 31) \ + | (((THIS) & 0x0000000000020000) << 29) \ + | (((THIS) & 0x0000000000040000) << 27) \ + | (((THIS) & 0x0000000000080000) << 25) \ + | (((THIS) & 0x0000000000100000) << 23) \ + | (((THIS) & 0x0000000000200000) << 21) \ + | (((THIS) & 0x0000000000400000) << 19) \ + | (((THIS) & 0x0000000000800000) << 17) \ + | (((THIS) & 0x0000000001000000) << 15) \ + | (((THIS) & 0x0000000002000000) << 13) \ + | (((THIS) & 0x0000000004000000) << 11) \ + | (((THIS) & 0x0000000008000000) << 9) \ + | (((THIS) & 0x0000000010000000) << 7) \ + | (((THIS) & 0x0000000020000000) << 5) \ + | (((THIS) & 0x0000000040000000) << 3) \ + | (((THIS) & 0x0000000080000000) << 1) \ + | (((THIS) & 0x0000000100000000) >> 1) \ + | (((THIS) & 0x0000000200000000) >> 3) \ + | (((THIS) & 0x0000000400000000) >> 5) \ + | (((THIS) & 0x0000000800000000) >> 7) \ + | (((THIS) & 0x0000001000000000) >> 9) \ + | (((THIS) & 0x0000002000000000) >> 11) \ + | (((THIS) & 0x0000004000000000) >> 13) \ + | (((THIS) & 0x0000008000000000) >> 15) \ + | (((THIS) & 0x0000010000000000) >> 17) \ + | (((THIS) & 0x0000020000000000) >> 19) \ + | (((THIS) & 0x0000040000000000) >> 21) \ + | (((THIS) & 0x0000080000000000) >> 23) \ + | (((THIS) & 0x0000100000000000) >> 25) \ + | (((THIS) & 0x0000200000000000) >> 27) \ + | (((THIS) & 0x0000400000000000) >> 29) \ + | (((THIS) & 0x0000800000000000) >> 31) \ + | (((THIS) & 0x0001000000000000) >> 33) \ + | (((THIS) & 0x0002000000000000) >> 35) \ + | (((THIS) & 0x0004000000000000) >> 37) \ + | (((THIS) & 0x0008000000000000) >> 39) \ + | (((THIS) & 0x0010000000000000) >> 41) \ + | (((THIS) & 0x0020000000000000) >> 43) \ + | (((THIS) & 0x0040000000000000) >> 45) \ + | (((THIS) & 0x0080000000000000) >> 47) \ + | (((THIS) & 0x0100000000000000) >> 49) \ + | (((THIS) & 0x0200000000000000) >> 51) \ + | (((THIS) & 0x0400000000000000) >> 53) \ + | (((THIS) & 0x0800000000000000) >> 55) \ + | (((THIS) & 0x1000000000000000) >> 57) \ + | (((THIS) & 0x2000000000000000) >> 59) \ + | (((THIS) & 0x4000000000000000) >> 61) \ + | (((THIS) & 0x8000000000000000) >> 63) \ +) + +#define BITREVERSE64(THIS) \ + (BITREVERSE64EX((uint64_t)(THIS))) + +#define BITREVERSE32EX(THIS) \ +( \ + (((THIS) & 0x00000001) << 31) \ + | (((THIS) & 0x00000002) << 29) \ + | (((THIS) & 0x00000004) << 27) \ + | (((THIS) & 0x00000008) << 25) \ + | (((THIS) & 0x00000010) << 23) \ + | (((THIS) & 0x00000020) << 21) \ + | (((THIS) & 0x00000040) << 19) \ + | (((THIS) & 0x00000080) << 17) \ + | (((THIS) & 0x00000100) << 15) \ + | (((THIS) & 0x00000200) << 13) \ + | (((THIS) & 0x00000400) << 11) \ + | (((THIS) & 0x00000800) << 9) \ + | (((THIS) & 0x00001000) << 7) \ + | (((THIS) & 0x00002000) << 5) \ + | (((THIS) & 0x00004000) << 3) \ + | (((THIS) & 0x00008000) << 1) \ + | (((THIS) & 0x00010000) >> 1) \ + | (((THIS) & 0x00020000) >> 3) \ + | (((THIS) & 0x00040000) >> 5) \ + | (((THIS) & 0x00080000) >> 7) \ + | (((THIS) & 0x00100000) >> 9) \ + | (((THIS) & 0x00200000) >> 11) \ + | (((THIS) & 0x00400000) >> 13) \ + | (((THIS) & 0x00800000) >> 15) \ + | (((THIS) & 0x01000000) >> 17) \ + | (((THIS) & 0x02000000) >> 19) \ + | (((THIS) & 0x04000000) >> 21) \ + | (((THIS) & 0x08000000) >> 23) \ + | (((THIS) & 0x10000000) >> 25) \ + | (((THIS) & 0x20000000) >> 27) \ + | (((THIS) & 0x40000000) >> 29) \ + | (((THIS) & 0x80000000) >> 31) \ +) + +#define BITREVERSE32(THIS) \ + (BITREVERSE32EX((uint32_t)(THIS))) + +enum { + XNDIVP_RK08F = (BITREVERSE32(CRC32_POLYNOMIAL)) | 0x100000000ull, + XNDIVP_RK08R = (BITREVERSE64(XNDIVP_RK08F) >> 31) | 1, + + /* The beginning ... */ + XNDIVP_MOD_ITER_0 = XNDIVP_RK08F, + XNDIVP_DIV_ITER_0 = 1, + +/* to generate table, run this: + +#include +int main(void) +{ + unsigned i; + + for (i = 1; i <= (4*128+32); i++) { + printf("XNDIVP_MOD_ITER(%u, %u)\n", i, i - 1); + printf("XNDIVP_DIV_ITER(%u, %u)\n", i, i - 1); + } + + return 0; +} +*/ + +#define XNDIVP_MOD_ITER(This, last) \ + XNDIVP_MOD_ITER_##This = (uint64_t)((XNDIVP_MOD_ITER_##last << 1) ^ ((XNDIVP_MOD_ITER_##last & 0x80000000) ? (XNDIVP_RK08F) : 0)), + +#define XNDIVP_DIV_ITER(This, last) \ + XNDIVP_DIV_ITER_##This = (uint64_t)(((uint64_t)XNDIVP_DIV_ITER_##last << 1) | ((XNDIVP_MOD_ITER_##last & 0x80000000ull) ? 1 : 0)), + +#include "crc32x86-tab.h" + +#undef XNDIVP_MOD_ITER +#undef XNDIVP_DIV_ITER + +#define FIXUPCONSTANTS(x) (BITREVERSE64(x) >> 31) + RK01 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64), + RK02 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_128), + RK05 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64), + RK06 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_32), + RK07 = FIXUPCONSTANTS(XNDIVP_DIV_ITER_32), + RK08 = XNDIVP_RK08R, +#undef FIXUPCONSTANTS +}; + +__attribute__((__target__("vpclmulqdq"))) +uint32_t crc32x86_vpclmulqdq_r(uint32_t crc, const unsigned char *msg, size_t sz) +{ + /* This actually works for 16-byte buffers too, but whether it's actually + * useful or faster is another question entirely */ + if (sz >= 32) { + static const __attribute__((__aligned__(16))) uint64_t rk01[2] = {RK01, RK02}, + rk05[2] = {RK05, RK06}, + rk07[2] = {RK07, RK08}, + mask2[2] = {0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF}; + __m128i rk, msgxmm; + + msgxmm = _mm_xor_si128(_mm_load_si128((__m128i *)msg), _mm_cvtsi32_si128(crc)); + + rk = _mm_load_si128((__m128i *)rk01); + + for (msg += 16, sz -= 16; sz >= 16; msg += 16, sz -= 16) { + msgxmm = _mm_xor_si128(_mm_xor_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x10), _mm_clmulepi64_si128(msgxmm, rk, 0x01)), _mm_load_si128((__m128i *)msg)); + } + + rk = _mm_load_si128((__m128i *)rk05); + + msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), _mm_srli_si128(msgxmm, 8)); + + msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_slli_si128(msgxmm, 12), rk, 0x11), _mm_and_si128(msgxmm, _mm_load_si128((__m128i *)mask2))); + + /* Barrett Reduction */ + rk = _mm_load_si128((__m128i *)rk07); + msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), rk, 0x10), msgxmm); + + crc = _mm_extract_epi32(msgxmm, 2); + } + + if (!sz) return crc; + + /* We were already aligned on a 16-byte boundary going in (hopefully + * or else it will break), and we process 16-bytes at a time. This + * means `msg` is aligned 16-bytes, a multiple of 4-byte, so we don't + * need to align any more (or use crc32c_r). */ + return crc32qw_r(crc, msg, sz); +}