annotate src/vec.c @ 29:e59c91d050c0

*: add aligned malloc stuff :)
author Paper <paper@tflc.us>
date Thu, 24 Apr 2025 17:12:05 -0400
parents c6c99ab1088a
children 641d8c79b1da
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
1 /**
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
2 * vec - a tiny SIMD vector library in C99
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
3 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
4 * Copyright (c) 2024 Paper
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
5 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
7 * of this software and associated documentation files (the "Software"), to deal
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
8 * in the Software without restriction, including without limitation the rights
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
10 * copies of the Software, and to permit persons to whom the Software is
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
11 * furnished to do so, subject to the following conditions:
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
12 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
13 * The above copyright notice and this permission notice shall be included in all
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
14 * copies or substantial portions of the Software.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
15 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
22 * SOFTWARE.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
23 **/
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
24
8
6e0eb3aa12ab build: add files to build vec as an external library
Paper <paper@tflc.us>
parents:
diff changeset
25 #include "vec/vec.h"
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
26 #include "vec/cpu.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
27 #include "vec/impl/generic.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
28 #include "vec/impl/fallback.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
29 #ifdef VEC_COMPILER_HAS_MMX
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
30 # include "vec/impl/x86/mmx.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
31 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
32 #ifdef VEC_COMPILER_HAS_SSE2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
33 # include "vec/impl/x86/sse2.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
34 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
35 #ifdef VEC_COMPILER_HAS_SSE3
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
36 # include "vec/impl/x86/sse3.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
37 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
38 #ifdef VEC_COMPILER_HAS_SSE41
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
39 # include "vec/impl/x86/sse41.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
40 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
41 #ifdef VEC_COMPILER_HAS_SSE42
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
42 # include "vec/impl/x86/sse42.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
43 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
44 #ifdef VEC_COMPILER_HAS_AVX2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
45 # include "vec/impl/x86/avx2.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
46 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
47 #ifdef VEC_COMPILER_HAS_AVX512F
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
48 # include "vec/impl/x86/avx512f.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
49 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
50 #ifdef VEC_COMPILER_HAS_AVX512BW
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
51 # include "vec/impl/x86/avx512bw.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
52 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
53 #ifdef VEC_COMPILER_HAS_AVX512DQ
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
54 # include "vec/impl/x86/avx512dq.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
55 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
56 #ifdef VEC_COMPILER_HAS_ALTIVEC
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
57 # include "vec/impl/ppc/altivec.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
58 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
59 #ifdef VEC_COMPILER_HAS_NEON
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
60 # include "vec/impl/arm/neon.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
61 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
62
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
63 extern inline vec_intmax vec_rshift(vec_intmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
64 extern inline vec_intmax vec_lshift(vec_intmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
65
25
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
66 extern inline vec_intmax vec_avg(vec_intmax x, vec_intmax y);
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
67 extern inline vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y);
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
68
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
69 // 16-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
70 vint8x2_impl vint8x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
71 vuint8x2_impl vuint8x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
72
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
73 // 32-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
74 vint8x4_impl vint8x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
75 vuint8x4_impl vuint8x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
76 vint16x2_impl vint16x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
77 vuint16x2_impl vuint16x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
78
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
79 // 64-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
80 vint8x8_impl vint8x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
81 vuint8x8_impl vuint8x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
82 vint16x4_impl vint16x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
83 vuint16x4_impl vuint16x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
84 vint32x2_impl vint32x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
85 vuint32x2_impl vuint32x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
86
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
87 // 128-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
88 vint8x16_impl vint8x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
89 vuint8x16_impl vuint8x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
90 vint16x8_impl vint16x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
91 vuint16x8_impl vuint16x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
92 vint32x4_impl vint32x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
93 vuint32x4_impl vuint32x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
94 vint64x2_impl vint64x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
95 vuint64x2_impl vuint64x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
96
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
97 // 256-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
98 vint8x32_impl vint8x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
99 vuint8x32_impl vuint8x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
100 vint16x16_impl vint16x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
101 vuint16x16_impl vuint16x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
102 vint32x8_impl vint32x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
103 vuint32x8_impl vuint32x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
104 vint64x4_impl vint64x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
105 vuint64x4_impl vuint64x4_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
106
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
107 // 512-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
108 vint8x64_impl vint8x64_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
109 vuint8x64_impl vuint8x64_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
110 vint16x32_impl vint16x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
111 vuint16x32_impl vuint16x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
112 vint32x16_impl vint32x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
113 vuint32x16_impl vuint32x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
114 vint64x8_impl vint64x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
115 vuint64x8_impl vuint64x8_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
116
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
117 static int vec_init_spinner = 0;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
118
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
119 #define FILL_GIVEN_FUNC_PTR(cpu, impl, func) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
120 do { \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
121 if (!(cpu).func && (impl).func) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
122 (cpu).func = (impl).func; \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
123 } while (0)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
124
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
125 #define FILL_GIVEN_FUNC_PTRS_EX(cpu, impl) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
126 do { \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
127 FILL_GIVEN_FUNC_PTR(cpu, impl, splat); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
128 FILL_GIVEN_FUNC_PTR(cpu, impl, load_aligned); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
129 FILL_GIVEN_FUNC_PTR(cpu, impl, load); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
130 FILL_GIVEN_FUNC_PTR(cpu, impl, store_aligned); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
131 FILL_GIVEN_FUNC_PTR(cpu, impl, store); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
132 FILL_GIVEN_FUNC_PTR(cpu, impl, add); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
133 FILL_GIVEN_FUNC_PTR(cpu, impl, sub); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
134 FILL_GIVEN_FUNC_PTR(cpu, impl, mul); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
135 FILL_GIVEN_FUNC_PTR(cpu, impl, div); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
136 FILL_GIVEN_FUNC_PTR(cpu, impl, avg); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
137 FILL_GIVEN_FUNC_PTR(cpu, impl, band); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
138 FILL_GIVEN_FUNC_PTR(cpu, impl, bor); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
139 FILL_GIVEN_FUNC_PTR(cpu, impl, bxor); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
140 FILL_GIVEN_FUNC_PTR(cpu, impl, lshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
141 FILL_GIVEN_FUNC_PTR(cpu, impl, rshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
142 FILL_GIVEN_FUNC_PTR(cpu, impl, lrshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
143 FILL_GIVEN_FUNC_PTR(cpu, impl, cmplt); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
144 FILL_GIVEN_FUNC_PTR(cpu, impl, cmple); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
145 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpeq); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
146 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpge); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
147 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpgt); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
148 FILL_GIVEN_FUNC_PTR(cpu, impl, min); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
149 FILL_GIVEN_FUNC_PTR(cpu, impl, max); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
150 } while (0)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
151
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
152 #define FILL_GIVEN_FUNC_PTRS(sign, bits, size, impl) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
153 FILL_GIVEN_FUNC_PTRS_EX(v##sign##int##bits##x##size##_impl_cpu, v##sign##int##bits##x##size##_impl_##impl)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
154
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
155 // returns 0 or a negative error code on failure
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
156 int vec_init(void)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
157 {
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
158 // This function is NOT thread safe. However, once vec
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
159 // is initialized, all of the vector functions are thread-safe.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
160
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
161 if (vec_init_spinner)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
162 return 0; // already initialized, do nothing
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
163
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
164 vec_uint32 cpu = vec_get_CPU_features();
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
165
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
166 /* Okay, this might be a little confusing:
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
167 * The way we do this is because of x86. For weird reasons,
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
168 * Intel decided to extend their prior CPU extensions to
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
169 * where SSE4.1 has some extended features of SSE2, AVX2
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
170 * has some extended features that should've been in SSE
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
171 * in general, etc.
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
172 *
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
173 * For this, I've just decided to keep the function
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
174 * definitions private, and fill in as we go, with newer
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
175 * intrinsics preferred. Others are arbitrary and are
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
176 * mutually exclusive (i.e. Altivec vs NEON). This is simply
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
177 * the easiest way to go about it :) */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
178
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
179 /* --- 512-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
180 #ifdef VEC_COMPILER_HAS_AVX512DQ
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
181 if (cpu & VEC_CPU_HAS_AVX512DQ) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
182 /* these give us native multiply instructions */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
183 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512dq);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
184 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512dq);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
185 }
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
186 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
187 #ifdef VEC_COMPILER_HAS_AVX512BW
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
188 if (cpu & VEC_CPU_HAS_AVX512BW) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
189 FILL_GIVEN_FUNC_PTRS( , 8, 64, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
190 FILL_GIVEN_FUNC_PTRS(u, 8, 64, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
191 FILL_GIVEN_FUNC_PTRS( , 16, 32, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
192 FILL_GIVEN_FUNC_PTRS(u, 16, 32, avx512bw);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
193 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
194 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
195 #ifdef VEC_COMPILER_HAS_AVX512F
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
196 if (cpu & VEC_CPU_HAS_AVX512F) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
197 FILL_GIVEN_FUNC_PTRS( , 32, 16, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
198 FILL_GIVEN_FUNC_PTRS(u, 32, 16, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
199 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
200 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
201 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
202 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
203
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
204 /* --- 256-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
205 #ifdef VEC_COMPILER_HAS_AVX2
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
206 if (cpu & VEC_CPU_HAS_AVX2) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
207 FILL_GIVEN_FUNC_PTRS( , 8, 32, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
208 FILL_GIVEN_FUNC_PTRS(u, 8, 32, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
209 FILL_GIVEN_FUNC_PTRS( , 16, 16, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
210 FILL_GIVEN_FUNC_PTRS(u, 16, 16, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
211 FILL_GIVEN_FUNC_PTRS( , 32, 8, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
212 FILL_GIVEN_FUNC_PTRS(u, 32, 8, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
213 FILL_GIVEN_FUNC_PTRS( , 64, 4, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
214 FILL_GIVEN_FUNC_PTRS(u, 64, 4, avx2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
215 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
216 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
217
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
218 /* --- 128-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
219 #ifdef VEC_COMPILER_HAS_SSE42
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
220 if (cpu & VEC_CPU_HAS_SSE41) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
221 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse42);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
222 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse42);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
223 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
224 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
225 #ifdef VEC_COMPILER_HAS_SSE41
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
226 if (cpu & VEC_CPU_HAS_SSE41) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
227 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
228 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
229 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
230 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
231 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
232 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
233 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
234 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
235 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
236 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
237 #ifdef VEC_COMPILER_HAS_SSE3
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
238 if (cpu & VEC_CPU_HAS_SSE3) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
239 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
240 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
241 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
242 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
243 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
244 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
245 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
246 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse3);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
247 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
248 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
249 #ifdef VEC_COMPILER_HAS_SSE2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
250 if (cpu & VEC_CPU_HAS_SSE2) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
251 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
252 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
253 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
254 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
255 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
256 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
257 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
258 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
259 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
260 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
261 #ifdef VEC_COMPILER_HAS_NEON
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
262 if (cpu & VEC_CPU_HAS_NEON) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
263 FILL_GIVEN_FUNC_PTRS( , 8, 16, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
264 FILL_GIVEN_FUNC_PTRS(u, 8, 16, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
265 FILL_GIVEN_FUNC_PTRS( , 16, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
266 FILL_GIVEN_FUNC_PTRS(u, 16, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
267 FILL_GIVEN_FUNC_PTRS( , 32, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
268 FILL_GIVEN_FUNC_PTRS(u, 32, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
269 FILL_GIVEN_FUNC_PTRS( , 64, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
270 FILL_GIVEN_FUNC_PTRS(u, 64, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
271 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
272 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
273 #ifdef VEC_COMPILER_HAS_ALTIVEC
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
274 if (cpu & VEC_CPU_HAS_ALTIVEC) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
275 FILL_GIVEN_FUNC_PTRS( , 8, 16, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
276 FILL_GIVEN_FUNC_PTRS(u, 8, 16, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
277 FILL_GIVEN_FUNC_PTRS( , 16, 8, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
278 FILL_GIVEN_FUNC_PTRS(u, 16, 8, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
279 FILL_GIVEN_FUNC_PTRS( , 32, 4, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
280 FILL_GIVEN_FUNC_PTRS(u, 32, 4, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
281 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
282 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
283
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
284 /* --- 64-bit */
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
285 #ifdef VEC_COMPILER_HAS_MMX
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
286 if (cpu & VEC_CPU_HAS_MMX) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
287 FILL_GIVEN_FUNC_PTRS( , 8, 8, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
288 FILL_GIVEN_FUNC_PTRS(u, 8, 8, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
289 FILL_GIVEN_FUNC_PTRS( , 16, 4, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
290 FILL_GIVEN_FUNC_PTRS(u, 16, 4, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
291 FILL_GIVEN_FUNC_PTRS( , 32, 2, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
292 FILL_GIVEN_FUNC_PTRS(u, 32, 2, mmx);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
293 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
294 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
295 #ifdef VEC_COMPILER_HAS_NEON
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
296 if (cpu & VEC_CPU_HAS_NEON) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
297 FILL_GIVEN_FUNC_PTRS( , 8, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
298 FILL_GIVEN_FUNC_PTRS(u, 8, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
299 FILL_GIVEN_FUNC_PTRS( , 16, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
300 FILL_GIVEN_FUNC_PTRS(u, 16, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
301 FILL_GIVEN_FUNC_PTRS( , 32, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
302 FILL_GIVEN_FUNC_PTRS(u, 32, 2, neon);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
303 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
304 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
305
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
306 /* fill any remaining function pointers with generics */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
307 FILL_GIVEN_FUNC_PTRS( , 8, 64, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
308 FILL_GIVEN_FUNC_PTRS(u, 8, 64, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
309 FILL_GIVEN_FUNC_PTRS( , 16, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
310 FILL_GIVEN_FUNC_PTRS(u, 16, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
311 FILL_GIVEN_FUNC_PTRS( , 32, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
312 FILL_GIVEN_FUNC_PTRS(u, 32, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
313 FILL_GIVEN_FUNC_PTRS( , 64, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
314 FILL_GIVEN_FUNC_PTRS(u, 64, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
315
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
316 FILL_GIVEN_FUNC_PTRS( , 8, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
317 FILL_GIVEN_FUNC_PTRS(u, 8, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
318 FILL_GIVEN_FUNC_PTRS( , 16, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
319 FILL_GIVEN_FUNC_PTRS(u, 16, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
320 FILL_GIVEN_FUNC_PTRS( , 32, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
321 FILL_GIVEN_FUNC_PTRS(u, 32, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
322 FILL_GIVEN_FUNC_PTRS( , 64, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
323 FILL_GIVEN_FUNC_PTRS(u, 64, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
324
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
325 FILL_GIVEN_FUNC_PTRS( , 8, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
326 FILL_GIVEN_FUNC_PTRS(u, 8, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
327 FILL_GIVEN_FUNC_PTRS( , 16, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
328 FILL_GIVEN_FUNC_PTRS(u, 16, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
329 FILL_GIVEN_FUNC_PTRS( , 32, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
330 FILL_GIVEN_FUNC_PTRS(u, 32, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
331 FILL_GIVEN_FUNC_PTRS( , 64, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
332 FILL_GIVEN_FUNC_PTRS(u, 64, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
333
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
334 FILL_GIVEN_FUNC_PTRS( , 8, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
335 FILL_GIVEN_FUNC_PTRS(u, 8, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
336 FILL_GIVEN_FUNC_PTRS( , 16, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
337 FILL_GIVEN_FUNC_PTRS(u, 16, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
338 FILL_GIVEN_FUNC_PTRS( , 32, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
339 FILL_GIVEN_FUNC_PTRS(u, 32, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
340
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
341 FILL_GIVEN_FUNC_PTRS( , 8, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
342 FILL_GIVEN_FUNC_PTRS(u, 8, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
343 FILL_GIVEN_FUNC_PTRS( , 16, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
344 FILL_GIVEN_FUNC_PTRS(u, 16, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
345
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
346 FILL_GIVEN_FUNC_PTRS( , 8, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
347 FILL_GIVEN_FUNC_PTRS(u, 8, 2, generic);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
348
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
349 vec_init_spinner++;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
350
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
351 return 0;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
352 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
353
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
354 /* ---------------------------------------------------------------- */
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
355
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
356 #define VEC_DEFINE_OPERATIONS_SIGN(sign, bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
357 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
358 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load_aligned(const vec_##sign##int##bits in[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
359 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
360 extern inline void v##sign##int##bits##x##size##_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
361 extern inline void v##sign##int##bits##x##size##_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
362 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
363 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
364 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
365 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
366 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
367 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
368 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
369 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
370 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
371 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
372 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
373 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
374 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
375 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
376 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
377 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
378 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
379 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
380
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
381 #define VEC_DEFINE_OPERATIONS(bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
382 VEC_DEFINE_OPERATIONS_SIGN( , bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
383 VEC_DEFINE_OPERATIONS_SIGN(u, bits, size)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
384
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
385 // 16-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
386 VEC_DEFINE_OPERATIONS(8, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
387
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
388 // 32-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
389 VEC_DEFINE_OPERATIONS(8, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
390 VEC_DEFINE_OPERATIONS(16, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
391
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
392 // 64-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
393 VEC_DEFINE_OPERATIONS(8, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
394 VEC_DEFINE_OPERATIONS(16, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
395 VEC_DEFINE_OPERATIONS(32, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
396
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
397 // 128-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
398 VEC_DEFINE_OPERATIONS(8, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
399 VEC_DEFINE_OPERATIONS(16, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
400 VEC_DEFINE_OPERATIONS(32, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
401 VEC_DEFINE_OPERATIONS(64, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
402
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
403 // 256-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
404 VEC_DEFINE_OPERATIONS(8, 32)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
405 VEC_DEFINE_OPERATIONS(16, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
406 VEC_DEFINE_OPERATIONS(32, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
407 VEC_DEFINE_OPERATIONS(64, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
408
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
409 // 512-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
410 VEC_DEFINE_OPERATIONS(8, 64)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
411 VEC_DEFINE_OPERATIONS(16, 32)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
412 VEC_DEFINE_OPERATIONS(32, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
413 VEC_DEFINE_OPERATIONS(64, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
414
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
415 #undef VEC_DEFINE_OPERATIONS
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
416 #undef VEC_DEFINE_OPERATIONS_SIGN