annotate src/vec.c @ 28:c6c99ab1088a

*: add min/max functions and a big big refactor (again) agh, this time I added a few more implementations (and generally made the code just a little faster...)
author Paper <paper@tflc.us>
date Thu, 24 Apr 2025 00:54:02 -0400
parents 92156fe32755
children e59c91d050c0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
1 /**
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
2 * vec - a tiny SIMD vector library in C99
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
3 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
4 * Copyright (c) 2024 Paper
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
5 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
7 * of this software and associated documentation files (the "Software"), to deal
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
8 * in the Software without restriction, including without limitation the rights
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
10 * copies of the Software, and to permit persons to whom the Software is
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
11 * furnished to do so, subject to the following conditions:
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
12 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
13 * The above copyright notice and this permission notice shall be included in all
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
14 * copies or substantial portions of the Software.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
15 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
22 * SOFTWARE.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
23 **/
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
24
8
6e0eb3aa12ab build: add files to build vec as an external library
Paper <paper@tflc.us>
parents:
diff changeset
25 #include "vec/vec.h"
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
26 #include "vec/cpu.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
27 #include "vec/impl/generic.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
28 #include "vec/impl/fallback.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
29 #ifdef VEC_COMPILER_HAS_MMX
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
30 # include "vec/impl/x86/mmx.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
31 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
32 #ifdef VEC_COMPILER_HAS_SSE2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
33 # include "vec/impl/x86/sse2.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
34 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
35 #ifdef VEC_COMPILER_HAS_SSE3
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
36 # include "vec/impl/x86/sse3.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
37 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
38 #ifdef VEC_COMPILER_HAS_SSE41
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
39 # include "vec/impl/x86/sse41.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
40 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
41 #ifdef VEC_COMPILER_HAS_SSE42
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
42 # include "vec/impl/x86/sse42.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
43 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
44 #ifdef VEC_COMPILER_HAS_AVX2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
45 # include "vec/impl/x86/avx2.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
46 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
47 #ifdef VEC_COMPILER_HAS_AVX512F
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
48 # include "vec/impl/x86/avx512f.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
49 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
50 #ifdef VEC_COMPILER_HAS_AVX512BW
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
51 # include "vec/impl/x86/avx512bw.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
52 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
53 #ifdef VEC_COMPILER_HAS_AVX512DQ
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
54 # include "vec/impl/x86/avx512dq.h"
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
55 #endif
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
56 #ifdef VEC_COMPILER_HAS_ALTIVEC
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
57 # include "vec/impl/ppc/altivec.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
58 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
59 #ifdef VEC_COMPILER_HAS_NEON
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
60 # include "vec/impl/arm/neon.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
61 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
62
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
63 extern inline vec_uintmax vec_lrshift(vec_uintmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
64 extern inline vec_uintmax vec_llshift(vec_uintmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
65 extern inline vec_uintmax vec_urshift(vec_uintmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
66 extern inline vec_uintmax vec_ulshift(vec_uintmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
67 extern inline vec_intmax vec_rshift(vec_intmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
68 extern inline vec_intmax vec_lshift(vec_intmax x, unsigned int y);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
69
25
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
70 extern inline vec_intmax vec_avg(vec_intmax x, vec_intmax y);
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
71 extern inline vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y);
92156fe32755 impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents: 23
diff changeset
72
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
73 // 16-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
74 vint8x2_impl vint8x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
75 vuint8x2_impl vuint8x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
76
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
77 // 32-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
78 vint8x4_impl vint8x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
79 vuint8x4_impl vuint8x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
80 vint16x2_impl vint16x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
81 vuint16x2_impl vuint16x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
82
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
83 // 64-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
84 vint8x8_impl vint8x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
85 vuint8x8_impl vuint8x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
86 vint16x4_impl vint16x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
87 vuint16x4_impl vuint16x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
88 vint32x2_impl vint32x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
89 vuint32x2_impl vuint32x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
90
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
91 // 128-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
92 vint8x16_impl vint8x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
93 vuint8x16_impl vuint8x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
94 vint16x8_impl vint16x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
95 vuint16x8_impl vuint16x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
96 vint32x4_impl vint32x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
97 vuint32x4_impl vuint32x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
98 vint64x2_impl vint64x2_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
99 vuint64x2_impl vuint64x2_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
100
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
101 // 256-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
102 vint8x32_impl vint8x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
103 vuint8x32_impl vuint8x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
104 vint16x16_impl vint16x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
105 vuint16x16_impl vuint16x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
106 vint32x8_impl vint32x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
107 vuint32x8_impl vuint32x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
108 vint64x4_impl vint64x4_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
109 vuint64x4_impl vuint64x4_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
110
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
111 // 512-bit
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
112 vint8x64_impl vint8x64_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
113 vuint8x64_impl vuint8x64_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
114 vint16x32_impl vint16x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
115 vuint16x32_impl vuint16x32_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
116 vint32x16_impl vint32x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
117 vuint32x16_impl vuint32x16_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
118 vint64x8_impl vint64x8_impl_cpu = {0};
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
119 vuint64x8_impl vuint64x8_impl_cpu = {0};
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
120
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
121 static int vec_init_spinner = 0;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
122
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
123 #define FILL_GIVEN_FUNC_PTR(cpu, impl, func) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
124 do { \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
125 if (!(cpu).func && (impl).func) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
126 (cpu).func = (impl).func; \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
127 } while (0)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
128
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
129 #define FILL_GIVEN_FUNC_PTRS_EX(cpu, impl) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
130 do { \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
131 FILL_GIVEN_FUNC_PTR(cpu, impl, splat); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
132 FILL_GIVEN_FUNC_PTR(cpu, impl, load_aligned); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
133 FILL_GIVEN_FUNC_PTR(cpu, impl, load); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
134 FILL_GIVEN_FUNC_PTR(cpu, impl, store_aligned); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
135 FILL_GIVEN_FUNC_PTR(cpu, impl, store); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
136 FILL_GIVEN_FUNC_PTR(cpu, impl, add); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
137 FILL_GIVEN_FUNC_PTR(cpu, impl, sub); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
138 FILL_GIVEN_FUNC_PTR(cpu, impl, mul); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
139 FILL_GIVEN_FUNC_PTR(cpu, impl, div); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
140 FILL_GIVEN_FUNC_PTR(cpu, impl, avg); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
141 FILL_GIVEN_FUNC_PTR(cpu, impl, band); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
142 FILL_GIVEN_FUNC_PTR(cpu, impl, bor); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
143 FILL_GIVEN_FUNC_PTR(cpu, impl, bxor); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
144 FILL_GIVEN_FUNC_PTR(cpu, impl, lshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
145 FILL_GIVEN_FUNC_PTR(cpu, impl, rshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
146 FILL_GIVEN_FUNC_PTR(cpu, impl, lrshift); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
147 FILL_GIVEN_FUNC_PTR(cpu, impl, cmplt); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
148 FILL_GIVEN_FUNC_PTR(cpu, impl, cmple); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
149 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpeq); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
150 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpge); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
151 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpgt); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
152 FILL_GIVEN_FUNC_PTR(cpu, impl, min); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
153 FILL_GIVEN_FUNC_PTR(cpu, impl, max); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
154 } while (0)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
155
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
156 #define FILL_GIVEN_FUNC_PTRS(sign, bits, size, impl) \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
157 FILL_GIVEN_FUNC_PTRS_EX(v##sign##int##bits##x##size##_impl_cpu, v##sign##int##bits##x##size##_impl_##impl)
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
158
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
159 // returns 0 or a negative error code on failure
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
160 int vec_init(void)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
161 {
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
162 // This function is NOT thread safe. However, once vec
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
163 // is initialized, all of the vector functions are thread-safe.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
164
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
165 if (vec_init_spinner)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
166 return 0; // already initialized, do nothing
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
167
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
168 vec_uint32 cpu = vec_get_CPU_features();
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
169
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
170 /* Okay, this might be a little confusing:
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
171 * The way we do this is because of x86. For weird reasons,
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
172 * Intel decided to extend their prior CPU extensions to
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
173 * where SSE4.1 has some extended features of SSE2, AVX2
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
174 * has some extended features that should've been in SSE
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
175 * in general, etc.
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
176 *
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
177 * For this, I've just decided to keep the function
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
178 * definitions private, and fill in as we go, with newer
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
179 * intrinsics preferred. Others are arbitrary and are
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
180 * mutually exclusive (i.e. Altivec vs NEON). This is simply
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
181 * the easiest way to go about it :) */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
182
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
183 /* --- 512-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
184 #ifdef VEC_COMPILER_HAS_AVX512DQ
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
185 if (cpu & VEC_CPU_HAS_AVX512DQ) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
186 /* these give us native multiply instructions */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
187 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512dq);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
188 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512dq);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
189 }
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
190 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
191 #ifdef VEC_COMPILER_HAS_AVX512BW
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
192 if (cpu & VEC_CPU_HAS_AVX512BW) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
193 FILL_GIVEN_FUNC_PTRS( , 8, 64, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
194 FILL_GIVEN_FUNC_PTRS(u, 8, 64, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
195 FILL_GIVEN_FUNC_PTRS( , 16, 32, avx512bw);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
196 FILL_GIVEN_FUNC_PTRS(u, 16, 32, avx512bw);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
197 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
198 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
199 #ifdef VEC_COMPILER_HAS_AVX512F
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
200 if (cpu & VEC_CPU_HAS_AVX512F) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
201 FILL_GIVEN_FUNC_PTRS( , 32, 16, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
202 FILL_GIVEN_FUNC_PTRS(u, 32, 16, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
203 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
204 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512f);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
205 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
206 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
207
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
208 /* --- 256-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
209 #ifdef VEC_COMPILER_HAS_AVX2
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
210 if (cpu & VEC_CPU_HAS_AVX2) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
211 FILL_GIVEN_FUNC_PTRS( , 8, 32, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
212 FILL_GIVEN_FUNC_PTRS(u, 8, 32, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
213 FILL_GIVEN_FUNC_PTRS( , 16, 16, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
214 FILL_GIVEN_FUNC_PTRS(u, 16, 16, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
215 FILL_GIVEN_FUNC_PTRS( , 32, 8, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
216 FILL_GIVEN_FUNC_PTRS(u, 32, 8, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
217 FILL_GIVEN_FUNC_PTRS( , 64, 4, avx2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
218 FILL_GIVEN_FUNC_PTRS(u, 64, 4, avx2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
219 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
220 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
221
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
222 /* --- 128-bit */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
223 #ifdef VEC_COMPILER_HAS_SSE42
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
224 if (cpu & VEC_CPU_HAS_SSE41) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
225 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse42);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
226 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse42);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
227 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
228 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
229 #ifdef VEC_COMPILER_HAS_SSE41
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
230 if (cpu & VEC_CPU_HAS_SSE41) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
231 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
232 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
233 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
234 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
235 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
236 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
237 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
238 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse41);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
239 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
240 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
241 #ifdef VEC_COMPILER_HAS_SSE3
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
242 if (cpu & VEC_CPU_HAS_SSE3) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
243 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
244 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
245 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
246 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
247 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
248 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
249 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse3);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
250 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse3);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
251 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
252 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
253 #ifdef VEC_COMPILER_HAS_SSE2
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
254 if (cpu & VEC_CPU_HAS_SSE2) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
255 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
256 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
257 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
258 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
259 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
260 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
261 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse2);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
262 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
263 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
264 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
265 #ifdef VEC_COMPILER_HAS_NEON
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
266 if (cpu & VEC_CPU_HAS_NEON) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
267 FILL_GIVEN_FUNC_PTRS( , 8, 16, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
268 FILL_GIVEN_FUNC_PTRS(u, 8, 16, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
269 FILL_GIVEN_FUNC_PTRS( , 16, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
270 FILL_GIVEN_FUNC_PTRS(u, 16, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
271 FILL_GIVEN_FUNC_PTRS( , 32, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
272 FILL_GIVEN_FUNC_PTRS(u, 32, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
273 FILL_GIVEN_FUNC_PTRS( , 64, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
274 FILL_GIVEN_FUNC_PTRS(u, 64, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
275 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
276 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
277 #ifdef VEC_COMPILER_HAS_ALTIVEC
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
278 if (cpu & VEC_CPU_HAS_ALTIVEC) {
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
279 FILL_GIVEN_FUNC_PTRS( , 8, 16, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
280 FILL_GIVEN_FUNC_PTRS(u, 8, 16, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
281 FILL_GIVEN_FUNC_PTRS( , 16, 8, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
282 FILL_GIVEN_FUNC_PTRS(u, 16, 8, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
283 FILL_GIVEN_FUNC_PTRS( , 32, 4, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
284 FILL_GIVEN_FUNC_PTRS(u, 32, 4, altivec);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
285 }
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
286 #endif
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
287
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
288 /* --- 64-bit */
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
289 #ifdef VEC_COMPILER_HAS_MMX
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
290 if (cpu & VEC_CPU_HAS_MMX) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
291 FILL_GIVEN_FUNC_PTRS( , 8, 8, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
292 FILL_GIVEN_FUNC_PTRS(u, 8, 8, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
293 FILL_GIVEN_FUNC_PTRS( , 16, 4, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
294 FILL_GIVEN_FUNC_PTRS(u, 16, 4, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
295 FILL_GIVEN_FUNC_PTRS( , 32, 2, mmx);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
296 FILL_GIVEN_FUNC_PTRS(u, 32, 2, mmx);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
297 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
298 #endif
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
299 #ifdef VEC_COMPILER_HAS_NEON
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
300 if (cpu & VEC_CPU_HAS_NEON) {
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
301 FILL_GIVEN_FUNC_PTRS( , 8, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
302 FILL_GIVEN_FUNC_PTRS(u, 8, 8, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
303 FILL_GIVEN_FUNC_PTRS( , 16, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
304 FILL_GIVEN_FUNC_PTRS(u, 16, 4, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
305 FILL_GIVEN_FUNC_PTRS( , 32, 2, neon);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
306 FILL_GIVEN_FUNC_PTRS(u, 32, 2, neon);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
307 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
308 #endif
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
309
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
310 /* fill any remaining function pointers with generics */
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
311 FILL_GIVEN_FUNC_PTRS( , 8, 64, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
312 FILL_GIVEN_FUNC_PTRS(u, 8, 64, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
313 FILL_GIVEN_FUNC_PTRS( , 16, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
314 FILL_GIVEN_FUNC_PTRS(u, 16, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
315 FILL_GIVEN_FUNC_PTRS( , 32, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
316 FILL_GIVEN_FUNC_PTRS(u, 32, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
317 FILL_GIVEN_FUNC_PTRS( , 64, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
318 FILL_GIVEN_FUNC_PTRS(u, 64, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
319
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
320 FILL_GIVEN_FUNC_PTRS( , 8, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
321 FILL_GIVEN_FUNC_PTRS(u, 8, 32, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
322 FILL_GIVEN_FUNC_PTRS( , 16, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
323 FILL_GIVEN_FUNC_PTRS(u, 16, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
324 FILL_GIVEN_FUNC_PTRS( , 32, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
325 FILL_GIVEN_FUNC_PTRS(u, 32, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
326 FILL_GIVEN_FUNC_PTRS( , 64, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
327 FILL_GIVEN_FUNC_PTRS(u, 64, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
328
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
329 FILL_GIVEN_FUNC_PTRS( , 8, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
330 FILL_GIVEN_FUNC_PTRS(u, 8, 16, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
331 FILL_GIVEN_FUNC_PTRS( , 16, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
332 FILL_GIVEN_FUNC_PTRS(u, 16, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
333 FILL_GIVEN_FUNC_PTRS( , 32, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
334 FILL_GIVEN_FUNC_PTRS(u, 32, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
335 FILL_GIVEN_FUNC_PTRS( , 64, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
336 FILL_GIVEN_FUNC_PTRS(u, 64, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
337
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
338 FILL_GIVEN_FUNC_PTRS( , 8, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
339 FILL_GIVEN_FUNC_PTRS(u, 8, 8, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
340 FILL_GIVEN_FUNC_PTRS( , 16, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
341 FILL_GIVEN_FUNC_PTRS(u, 16, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
342 FILL_GIVEN_FUNC_PTRS( , 32, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
343 FILL_GIVEN_FUNC_PTRS(u, 32, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
344
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
345 FILL_GIVEN_FUNC_PTRS( , 8, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
346 FILL_GIVEN_FUNC_PTRS(u, 8, 4, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
347 FILL_GIVEN_FUNC_PTRS( , 16, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
348 FILL_GIVEN_FUNC_PTRS(u, 16, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
349
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
350 FILL_GIVEN_FUNC_PTRS( , 8, 2, generic);
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
351 FILL_GIVEN_FUNC_PTRS(u, 8, 2, generic);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
352
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
353 vec_init_spinner++;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
354
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
355 return 0;
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
356 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
357
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
358 /* ---------------------------------------------------------------- */
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
359
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
360 #define VEC_DEFINE_OPERATIONS_SIGN(sign, bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
361 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
362 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load_aligned(const vec_##sign##int##bits in[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
363 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
364 extern inline void v##sign##int##bits##x##size##_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
365 extern inline void v##sign##int##bits##x##size##_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
366 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
367 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
368 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
369 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
370 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
371 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
372 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
373 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
374 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
375 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
376 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
377 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
378 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
379 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
380 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
28
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
381 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
382 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \
c6c99ab1088a *: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents: 25
diff changeset
383 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2);
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
384
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
385 #define VEC_DEFINE_OPERATIONS(bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
386 VEC_DEFINE_OPERATIONS_SIGN( , bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
387 VEC_DEFINE_OPERATIONS_SIGN(u, bits, size)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
388
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
389 // 16-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
390 VEC_DEFINE_OPERATIONS(8, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
391
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
392 // 32-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
393 VEC_DEFINE_OPERATIONS(8, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
394 VEC_DEFINE_OPERATIONS(16, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
395
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
396 // 64-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
397 VEC_DEFINE_OPERATIONS(8, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
398 VEC_DEFINE_OPERATIONS(16, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
399 VEC_DEFINE_OPERATIONS(32, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
400
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
401 // 128-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
402 VEC_DEFINE_OPERATIONS(8, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
403 VEC_DEFINE_OPERATIONS(16, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
404 VEC_DEFINE_OPERATIONS(32, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
405 VEC_DEFINE_OPERATIONS(64, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
406
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
407 // 256-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
408 VEC_DEFINE_OPERATIONS(8, 32)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
409 VEC_DEFINE_OPERATIONS(16, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
410 VEC_DEFINE_OPERATIONS(32, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
411 VEC_DEFINE_OPERATIONS(64, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
412
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
413 // 512-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
414 VEC_DEFINE_OPERATIONS(8, 64)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
415 VEC_DEFINE_OPERATIONS(16, 32)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
416 VEC_DEFINE_OPERATIONS(32, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
417 VEC_DEFINE_OPERATIONS(64, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
418
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
419 #undef VEC_DEFINE_OPERATIONS
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents: 15
diff changeset
420 #undef VEC_DEFINE_OPERATIONS_SIGN