annotate crc32x86.c @ 3:6483683ac857 default tip

*: add profiling code too; expand x86 to use all eight XMM registers basically ported verbatim from the assembly
author Paper <paper@tflc.us>
date Mon, 09 Feb 2026 21:30:30 -0500
parents ead9f84d11db
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
1 /* x86-specific CRC routines */
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
2
2
ead9f84d11db *: make it work on non-x86 too
Paper <paper@tflc.us>
parents: 0
diff changeset
3 #ifdef __x86_64__
ead9f84d11db *: make it work on non-x86 too
Paper <paper@tflc.us>
parents: 0
diff changeset
4
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
5 /* NOTE: None of this is really x86-specific.
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
6 * There are probably many other architectures with
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
7 * native 64x64->128.
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
8 *
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
9 * We could adapt this to use just the gcc uint128_t
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
10 * instead of x86 intrinsics, but it may slow things
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
11 * down a bit. */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
12
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
13 #define VPCLMULQDQ_TARGET __attribute__((__target__("vpclmulqdq")))
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
14
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
15 #include "crc32.h"
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
16 #include "crc32i.h"
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
17 #include <stdio.h>
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
18 #include <immintrin.h>
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
19
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
20 #define BITREVERSE64EX(THIS) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
21 ( \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
22 (((THIS) & 0x0000000000000001) << 63) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
23 | (((THIS) & 0x0000000000000002) << 61) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
24 | (((THIS) & 0x0000000000000004) << 59) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
25 | (((THIS) & 0x0000000000000008) << 57) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
26 | (((THIS) & 0x0000000000000010) << 55) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
27 | (((THIS) & 0x0000000000000020) << 53) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
28 | (((THIS) & 0x0000000000000040) << 51) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
29 | (((THIS) & 0x0000000000000080) << 49) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
30 | (((THIS) & 0x0000000000000100) << 47) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
31 | (((THIS) & 0x0000000000000200) << 45) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
32 | (((THIS) & 0x0000000000000400) << 43) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
33 | (((THIS) & 0x0000000000000800) << 41) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
34 | (((THIS) & 0x0000000000001000) << 39) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
35 | (((THIS) & 0x0000000000002000) << 37) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
36 | (((THIS) & 0x0000000000004000) << 35) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
37 | (((THIS) & 0x0000000000008000) << 33) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
38 | (((THIS) & 0x0000000000010000) << 31) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
39 | (((THIS) & 0x0000000000020000) << 29) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
40 | (((THIS) & 0x0000000000040000) << 27) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
41 | (((THIS) & 0x0000000000080000) << 25) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
42 | (((THIS) & 0x0000000000100000) << 23) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
43 | (((THIS) & 0x0000000000200000) << 21) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
44 | (((THIS) & 0x0000000000400000) << 19) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
45 | (((THIS) & 0x0000000000800000) << 17) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
46 | (((THIS) & 0x0000000001000000) << 15) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
47 | (((THIS) & 0x0000000002000000) << 13) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
48 | (((THIS) & 0x0000000004000000) << 11) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
49 | (((THIS) & 0x0000000008000000) << 9) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
50 | (((THIS) & 0x0000000010000000) << 7) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
51 | (((THIS) & 0x0000000020000000) << 5) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
52 | (((THIS) & 0x0000000040000000) << 3) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
53 | (((THIS) & 0x0000000080000000) << 1) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
54 | (((THIS) & 0x0000000100000000) >> 1) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
55 | (((THIS) & 0x0000000200000000) >> 3) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
56 | (((THIS) & 0x0000000400000000) >> 5) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
57 | (((THIS) & 0x0000000800000000) >> 7) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
58 | (((THIS) & 0x0000001000000000) >> 9) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
59 | (((THIS) & 0x0000002000000000) >> 11) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
60 | (((THIS) & 0x0000004000000000) >> 13) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
61 | (((THIS) & 0x0000008000000000) >> 15) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
62 | (((THIS) & 0x0000010000000000) >> 17) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
63 | (((THIS) & 0x0000020000000000) >> 19) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
64 | (((THIS) & 0x0000040000000000) >> 21) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
65 | (((THIS) & 0x0000080000000000) >> 23) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
66 | (((THIS) & 0x0000100000000000) >> 25) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
67 | (((THIS) & 0x0000200000000000) >> 27) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
68 | (((THIS) & 0x0000400000000000) >> 29) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
69 | (((THIS) & 0x0000800000000000) >> 31) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
70 | (((THIS) & 0x0001000000000000) >> 33) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
71 | (((THIS) & 0x0002000000000000) >> 35) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
72 | (((THIS) & 0x0004000000000000) >> 37) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
73 | (((THIS) & 0x0008000000000000) >> 39) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
74 | (((THIS) & 0x0010000000000000) >> 41) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
75 | (((THIS) & 0x0020000000000000) >> 43) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
76 | (((THIS) & 0x0040000000000000) >> 45) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
77 | (((THIS) & 0x0080000000000000) >> 47) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
78 | (((THIS) & 0x0100000000000000) >> 49) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
79 | (((THIS) & 0x0200000000000000) >> 51) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
80 | (((THIS) & 0x0400000000000000) >> 53) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
81 | (((THIS) & 0x0800000000000000) >> 55) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
82 | (((THIS) & 0x1000000000000000) >> 57) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
83 | (((THIS) & 0x2000000000000000) >> 59) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
84 | (((THIS) & 0x4000000000000000) >> 61) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
85 | (((THIS) & 0x8000000000000000) >> 63) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
86 )
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
87
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
88 #define BITREVERSE64(THIS) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
89 (BITREVERSE64EX((uint64_t)(THIS)))
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
90
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
91 #define BITREVERSE32EX(THIS) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
92 ( \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
93 (((THIS) & 0x00000001) << 31) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
94 | (((THIS) & 0x00000002) << 29) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
95 | (((THIS) & 0x00000004) << 27) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
96 | (((THIS) & 0x00000008) << 25) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
97 | (((THIS) & 0x00000010) << 23) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
98 | (((THIS) & 0x00000020) << 21) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
99 | (((THIS) & 0x00000040) << 19) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
100 | (((THIS) & 0x00000080) << 17) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
101 | (((THIS) & 0x00000100) << 15) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
102 | (((THIS) & 0x00000200) << 13) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
103 | (((THIS) & 0x00000400) << 11) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
104 | (((THIS) & 0x00000800) << 9) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
105 | (((THIS) & 0x00001000) << 7) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
106 | (((THIS) & 0x00002000) << 5) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
107 | (((THIS) & 0x00004000) << 3) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
108 | (((THIS) & 0x00008000) << 1) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
109 | (((THIS) & 0x00010000) >> 1) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
110 | (((THIS) & 0x00020000) >> 3) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
111 | (((THIS) & 0x00040000) >> 5) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
112 | (((THIS) & 0x00080000) >> 7) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
113 | (((THIS) & 0x00100000) >> 9) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
114 | (((THIS) & 0x00200000) >> 11) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
115 | (((THIS) & 0x00400000) >> 13) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
116 | (((THIS) & 0x00800000) >> 15) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
117 | (((THIS) & 0x01000000) >> 17) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
118 | (((THIS) & 0x02000000) >> 19) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
119 | (((THIS) & 0x04000000) >> 21) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
120 | (((THIS) & 0x08000000) >> 23) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
121 | (((THIS) & 0x10000000) >> 25) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
122 | (((THIS) & 0x20000000) >> 27) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
123 | (((THIS) & 0x40000000) >> 29) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
124 | (((THIS) & 0x80000000) >> 31) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
125 )
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
126
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
127 #define BITREVERSE32(THIS) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
128 (BITREVERSE32EX((uint32_t)(THIS)))
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
129
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
130 enum {
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
131 XNDIVP_RK08F = (BITREVERSE32(CRC32_POLYNOMIAL)) | 0x100000000ull,
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
132 XNDIVP_RK08R = (BITREVERSE64(XNDIVP_RK08F) >> 31) | 1,
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
133
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
134 /* The beginning ... */
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
135 XNDIVP_MOD_ITER_0 = XNDIVP_RK08F,
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
136 XNDIVP_DIV_ITER_0 = 1,
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
137
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
138 /* to generate table, run this:
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
139
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
140 #include <stdio.h>
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
141 int main(void)
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
142 {
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
143 unsigned i;
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
144
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
145 for (i = 1; i <= 1024; i++) {
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
146 printf("XNDIVP_MOD_ITER(%u, %u)\n", i, i - 1);
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
147 printf("XNDIVP_DIV_ITER(%u, %u)\n", i, i - 1);
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
148 }
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
149
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
150 return 0;
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
151 }
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
152 */
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
153
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
154 #define XNDIVP_MOD_ITER(This, last) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
155 XNDIVP_MOD_ITER_##This = (uint64_t)((XNDIVP_MOD_ITER_##last << 1) ^ ((XNDIVP_MOD_ITER_##last & 0x80000000) ? (XNDIVP_RK08F) : 0)),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
156
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
157 #define XNDIVP_DIV_ITER(This, last) \
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
158 XNDIVP_DIV_ITER_##This = (uint64_t)(((uint64_t)XNDIVP_DIV_ITER_##last << 1) | ((XNDIVP_MOD_ITER_##last & 0x80000000ull) ? 1 : 0)),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
159
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
160 #include "crc32x86-tab.h"
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
161
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
162 #undef XNDIVP_MOD_ITER
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
163 #undef XNDIVP_DIV_ITER
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
164
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
165 #define FIXUPCONSTANTS(x) (BITREVERSE64(x) >> 31)
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
166 RK01 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
167 RK02 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_128),
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
168 RK03 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_960),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
169 RK04 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_1024),
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
170 RK05 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_64),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
171 RK06 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_32),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
172 RK07 = FIXUPCONSTANTS(XNDIVP_DIV_ITER_32),
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
173 RK08 = XNDIVP_RK08R,
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
174 RK09 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_832),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
175 RK10 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_896),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
176 RK11 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_704),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
177 RK12 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_768),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
178 RK13 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_576),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
179 RK14 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_640),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
180 RK15 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_448),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
181 RK16 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_512),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
182 RK17 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_320),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
183 RK18 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_384),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
184 RK19 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_192),
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
185 RK20 = FIXUPCONSTANTS(XNDIVP_MOD_ITER_256),
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
186 #undef FIXUPCONSTANTS
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
187 };
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
188
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
189 VPCLMULQDQ_TARGET
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
190 CRC32_FORCEINLINE
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
191 uint32_t crc32x86_barrett_reduction(__m128i msgxmm)
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
192 {
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
193 static const CRC32_ALIGN(16) uint64_t rk05[2] = {RK05, RK06},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
194 rk07[2] = {RK07, RK08},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
195 mask2[2] = {0xFFFFFFFF00000000, 0xFFFFFFFFFFFFFFFF};
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
196 __m128i rk;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
197
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
198 rk = _mm_load_si128((__m128i *)rk05);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
199
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
200 msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), _mm_srli_si128(msgxmm, 8));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
201
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
202 msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_slli_si128(msgxmm, 12), rk, 0x11), _mm_and_si128(msgxmm, _mm_load_si128((__m128i *)mask2)));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
203
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
204 /* Barrett Reduction */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
205 rk = _mm_load_si128((__m128i *)rk07);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
206 msgxmm = _mm_xor_si128(_mm_clmulepi64_si128(_mm_clmulepi64_si128(msgxmm, rk, 0x00), rk, 0x10), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
207
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
208 return _mm_extract_epi32(msgxmm, 2);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
209 }
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
210
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
211 VPCLMULQDQ_TARGET
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
212 CRC32_FORCEINLINE
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
213 __m128i crc32x86_fold(__m128i xmm, __m128i rk, __m128i next)
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
214 {
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
215 return _mm_xor_si128(next, _mm_xor_si128(_mm_clmulepi64_si128(xmm, rk, 0x01), _mm_clmulepi64_si128(xmm, rk, 0x10)));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
216 }
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
217
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
218 /* GCC-specific shit */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
219 VPCLMULQDQ_TARGET
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
220 uint32_t crc32x86_vpclmulqdq_r(uint32_t crc, const unsigned char *msg, size_t sz)
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
221 {
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
222 static const CRC32_ALIGN(16) uint64_t rk01[2] = {RK01, RK02},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
223 rk03[2] = {RK03, RK04},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
224 rk09[2] = {RK09, RK10},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
225 rk11[2] = {RK11, RK12},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
226 rk13[2] = {RK13, RK14},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
227 rk15[2] = {RK15, RK16},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
228 rk17[2] = {RK17, RK18},
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
229 rk19[2] = {RK19, RK20};
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
230 __m128i msgxmm;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
231
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
232 if (sz >= 256) {
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
233 __m128i rk, msgxmma[8], xmm8;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
234
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
235 /* receive first 128 bytes */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
236 msgxmma[0] = _mm_load_si128((__m128i *)msg + 0);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
237 msgxmma[1] = _mm_load_si128((__m128i *)msg + 1);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
238 msgxmma[2] = _mm_load_si128((__m128i *)msg + 2);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
239 msgxmma[3] = _mm_load_si128((__m128i *)msg + 3);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
240 msgxmma[4] = _mm_load_si128((__m128i *)msg + 4);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
241 msgxmma[5] = _mm_load_si128((__m128i *)msg + 5);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
242 msgxmma[6] = _mm_load_si128((__m128i *)msg + 6);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
243 msgxmma[7] = _mm_load_si128((__m128i *)msg + 7);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
244 msg += 128;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
245 sz -= 128;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
246
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
247 /* XOR the initial CRC */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
248 msgxmma[0] = _mm_xor_si128(msgxmma[0], _mm_cvtsi32_si128(crc));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
249
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
250 rk = _mm_load_si128((__m128i *)rk03);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
251
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
252 for (; sz >= 128; msg += 128, sz -= 128) {
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
253 /* loop unrolled */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
254 msgxmma[0] = crc32x86_fold(msgxmma[0], rk, _mm_load_si128((__m128i *)msg + 0));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
255 msgxmma[1] = crc32x86_fold(msgxmma[1], rk, _mm_load_si128((__m128i *)msg + 1));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
256 msgxmma[2] = crc32x86_fold(msgxmma[2], rk, _mm_load_si128((__m128i *)msg + 2));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
257 msgxmma[3] = crc32x86_fold(msgxmma[3], rk, _mm_load_si128((__m128i *)msg + 3));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
258 msgxmma[4] = crc32x86_fold(msgxmma[4], rk, _mm_load_si128((__m128i *)msg + 4));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
259 msgxmma[5] = crc32x86_fold(msgxmma[5], rk, _mm_load_si128((__m128i *)msg + 5));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
260 msgxmma[6] = crc32x86_fold(msgxmma[6], rk, _mm_load_si128((__m128i *)msg + 6));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
261 msgxmma[7] = crc32x86_fold(msgxmma[7], rk, _mm_load_si128((__m128i *)msg + 7));
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
262 }
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
263
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
264 /* Fold it all into one xmm register */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
265 msgxmm = msgxmma[7];
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
266
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
267 msgxmm = crc32x86_fold(msgxmma[0], _mm_load_si128((__m128i *)rk09), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
268 msgxmm = crc32x86_fold(msgxmma[1], _mm_load_si128((__m128i *)rk11), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
269 msgxmm = crc32x86_fold(msgxmma[2], _mm_load_si128((__m128i *)rk13), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
270 msgxmm = crc32x86_fold(msgxmma[3], _mm_load_si128((__m128i *)rk15), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
271 msgxmm = crc32x86_fold(msgxmma[4], _mm_load_si128((__m128i *)rk17), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
272 msgxmm = crc32x86_fold(msgxmma[5], _mm_load_si128((__m128i *)rk19), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
273 msgxmm = crc32x86_fold(msgxmma[6], _mm_load_si128((__m128i *)rk01), msgxmm);
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
274
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
275 /* Jump across into the 16-byte code, skipping the loading.
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
276 * This is much simpler than either doing two barrett reductions or
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
277 * adding a whole ton of branches... */
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
278 goto jmpFrom128byte;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
279 }
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
280
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
281 /* This actually works for 16-byte buffers too, but whether it's actually
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
282 * useful or faster is another question entirely */
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
283 if (sz >= 32) {
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
284 __m128i rk;
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
285
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
286 msgxmm = _mm_xor_si128(_mm_load_si128((__m128i *)msg), _mm_cvtsi32_si128(crc));
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
287 msg += 16;
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
288 sz -= 16;
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
289
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
290 jmpFrom128byte:
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
291 rk = _mm_load_si128((__m128i *)rk01);
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
292
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
293 for (; sz >= 16; msg += 16, sz -= 16)
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
294 msgxmm = crc32x86_fold(msgxmm, rk, _mm_load_si128((__m128i *)msg));
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
295
3
6483683ac857 *: add profiling code too; expand x86 to use all eight XMM registers
Paper <paper@tflc.us>
parents: 2
diff changeset
296 crc = crc32x86_barrett_reduction(msgxmm);
0
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
297 }
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
298
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
299 if (!sz) return crc;
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
300
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
301 /* We were already aligned on a 16-byte boundary going in (hopefully
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
302 * or else it will break), and we process 16-bytes at a time. This
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
303 * means `msg` is aligned 16-bytes, a multiple of 4-byte, so we don't
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
304 * need to align any more (or use crc32c_r). */
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
305 return crc32qw_r(crc, msg, sz);
422835bc1aca *: checkin
Paper <paper@tflc.us>
parents:
diff changeset
306 }
2
ead9f84d11db *: make it work on non-x86 too
Paper <paper@tflc.us>
parents: 0
diff changeset
307
ead9f84d11db *: make it work on non-x86 too
Paper <paper@tflc.us>
parents: 0
diff changeset
308 #endif