Mercurial > vec
annotate src/vec.c @ 28:c6c99ab1088a
*: add min/max functions and a big big refactor (again)
agh, this time I added a few more implementations (and generally
made the code just a little faster...)
author | Paper <paper@tflc.us> |
---|---|
date | Thu, 24 Apr 2025 00:54:02 -0400 |
parents | 92156fe32755 |
children | e59c91d050c0 |
rev | line source |
---|---|
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
1 /** |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
2 * vec - a tiny SIMD vector library in C99 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
3 * |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
4 * Copyright (c) 2024 Paper |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
5 * |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
6 * Permission is hereby granted, free of charge, to any person obtaining a copy |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
7 * of this software and associated documentation files (the "Software"), to deal |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
8 * in the Software without restriction, including without limitation the rights |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
10 * copies of the Software, and to permit persons to whom the Software is |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
11 * furnished to do so, subject to the following conditions: |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
12 * |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
13 * The above copyright notice and this permission notice shall be included in all |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
14 * copies or substantial portions of the Software. |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
15 * |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
22 * SOFTWARE. |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
23 **/ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
24 |
8
6e0eb3aa12ab
build: add files to build vec as an external library
Paper <paper@tflc.us>
parents:
diff
changeset
|
25 #include "vec/vec.h" |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
26 #include "vec/cpu.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
27 #include "vec/impl/generic.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
28 #include "vec/impl/fallback.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
29 #ifdef VEC_COMPILER_HAS_MMX |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
30 # include "vec/impl/x86/mmx.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
31 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
32 #ifdef VEC_COMPILER_HAS_SSE2 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
33 # include "vec/impl/x86/sse2.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
34 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
35 #ifdef VEC_COMPILER_HAS_SSE3 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
36 # include "vec/impl/x86/sse3.h" |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
37 #endif |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
38 #ifdef VEC_COMPILER_HAS_SSE41 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
39 # include "vec/impl/x86/sse41.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
40 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
41 #ifdef VEC_COMPILER_HAS_SSE42 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
42 # include "vec/impl/x86/sse42.h" |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
43 #endif |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
44 #ifdef VEC_COMPILER_HAS_AVX2 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
45 # include "vec/impl/x86/avx2.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
46 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
47 #ifdef VEC_COMPILER_HAS_AVX512F |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
48 # include "vec/impl/x86/avx512f.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
49 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
50 #ifdef VEC_COMPILER_HAS_AVX512BW |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
51 # include "vec/impl/x86/avx512bw.h" |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
52 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
53 #ifdef VEC_COMPILER_HAS_AVX512DQ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
54 # include "vec/impl/x86/avx512dq.h" |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
55 #endif |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
56 #ifdef VEC_COMPILER_HAS_ALTIVEC |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
57 # include "vec/impl/ppc/altivec.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
58 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
59 #ifdef VEC_COMPILER_HAS_NEON |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
60 # include "vec/impl/arm/neon.h" |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
61 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
62 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
63 extern inline vec_uintmax vec_lrshift(vec_uintmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
64 extern inline vec_uintmax vec_llshift(vec_uintmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
65 extern inline vec_uintmax vec_urshift(vec_uintmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
66 extern inline vec_uintmax vec_ulshift(vec_uintmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
67 extern inline vec_intmax vec_rshift(vec_intmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
68 extern inline vec_intmax vec_lshift(vec_intmax x, unsigned int y); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
69 |
25
92156fe32755
impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents:
23
diff
changeset
|
70 extern inline vec_intmax vec_avg(vec_intmax x, vec_intmax y); |
92156fe32755
impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents:
23
diff
changeset
|
71 extern inline vec_uintmax vec_uavg(vec_uintmax x, vec_uintmax y); |
92156fe32755
impl/ppc/altivec: update to new implementation
Paper <paper@tflc.us>
parents:
23
diff
changeset
|
72 |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
73 // 16-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
74 vint8x2_impl vint8x2_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
75 vuint8x2_impl vuint8x2_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
76 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
77 // 32-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
78 vint8x4_impl vint8x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
79 vuint8x4_impl vuint8x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
80 vint16x2_impl vint16x2_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
81 vuint16x2_impl vuint16x2_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
82 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
83 // 64-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
84 vint8x8_impl vint8x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
85 vuint8x8_impl vuint8x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
86 vint16x4_impl vint16x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
87 vuint16x4_impl vuint16x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
88 vint32x2_impl vint32x2_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
89 vuint32x2_impl vuint32x2_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
90 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
91 // 128-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
92 vint8x16_impl vint8x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
93 vuint8x16_impl vuint8x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
94 vint16x8_impl vint16x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
95 vuint16x8_impl vuint16x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
96 vint32x4_impl vint32x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
97 vuint32x4_impl vuint32x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
98 vint64x2_impl vint64x2_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
99 vuint64x2_impl vuint64x2_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
100 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
101 // 256-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
102 vint8x32_impl vint8x32_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
103 vuint8x32_impl vuint8x32_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
104 vint16x16_impl vint16x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
105 vuint16x16_impl vuint16x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
106 vint32x8_impl vint32x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
107 vuint32x8_impl vuint32x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
108 vint64x4_impl vint64x4_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
109 vuint64x4_impl vuint64x4_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
110 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
111 // 512-bit |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
112 vint8x64_impl vint8x64_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
113 vuint8x64_impl vuint8x64_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
114 vint16x32_impl vint16x32_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
115 vuint16x32_impl vuint16x32_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
116 vint32x16_impl vint32x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
117 vuint32x16_impl vuint32x16_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
118 vint64x8_impl vint64x8_impl_cpu = {0}; |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
119 vuint64x8_impl vuint64x8_impl_cpu = {0}; |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
120 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
121 static int vec_init_spinner = 0; |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
122 |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
123 #define FILL_GIVEN_FUNC_PTR(cpu, impl, func) \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
124 do { \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
125 if (!(cpu).func && (impl).func) \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
126 (cpu).func = (impl).func; \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
127 } while (0) |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
128 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
129 #define FILL_GIVEN_FUNC_PTRS_EX(cpu, impl) \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
130 do { \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
131 FILL_GIVEN_FUNC_PTR(cpu, impl, splat); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
132 FILL_GIVEN_FUNC_PTR(cpu, impl, load_aligned); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
133 FILL_GIVEN_FUNC_PTR(cpu, impl, load); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
134 FILL_GIVEN_FUNC_PTR(cpu, impl, store_aligned); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
135 FILL_GIVEN_FUNC_PTR(cpu, impl, store); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
136 FILL_GIVEN_FUNC_PTR(cpu, impl, add); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
137 FILL_GIVEN_FUNC_PTR(cpu, impl, sub); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
138 FILL_GIVEN_FUNC_PTR(cpu, impl, mul); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
139 FILL_GIVEN_FUNC_PTR(cpu, impl, div); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
140 FILL_GIVEN_FUNC_PTR(cpu, impl, avg); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
141 FILL_GIVEN_FUNC_PTR(cpu, impl, band); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
142 FILL_GIVEN_FUNC_PTR(cpu, impl, bor); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
143 FILL_GIVEN_FUNC_PTR(cpu, impl, bxor); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
144 FILL_GIVEN_FUNC_PTR(cpu, impl, lshift); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
145 FILL_GIVEN_FUNC_PTR(cpu, impl, rshift); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
146 FILL_GIVEN_FUNC_PTR(cpu, impl, lrshift); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
147 FILL_GIVEN_FUNC_PTR(cpu, impl, cmplt); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
148 FILL_GIVEN_FUNC_PTR(cpu, impl, cmple); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
149 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpeq); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
150 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpge); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
151 FILL_GIVEN_FUNC_PTR(cpu, impl, cmpgt); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
152 FILL_GIVEN_FUNC_PTR(cpu, impl, min); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
153 FILL_GIVEN_FUNC_PTR(cpu, impl, max); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
154 } while (0) |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
155 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
156 #define FILL_GIVEN_FUNC_PTRS(sign, bits, size, impl) \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
157 FILL_GIVEN_FUNC_PTRS_EX(v##sign##int##bits##x##size##_impl_cpu, v##sign##int##bits##x##size##_impl_##impl) |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
158 |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
159 // returns 0 or a negative error code on failure |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
160 int vec_init(void) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
161 { |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
162 // This function is NOT thread safe. However, once vec |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
163 // is initialized, all of the vector functions are thread-safe. |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
164 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
165 if (vec_init_spinner) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
166 return 0; // already initialized, do nothing |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
167 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
168 vec_uint32 cpu = vec_get_CPU_features(); |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
169 |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
170 /* Okay, this might be a little confusing: |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
171 * The way we do this is because of x86. For weird reasons, |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
172 * Intel decided to extend their prior CPU extensions to |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
173 * where SSE4.1 has some extended features of SSE2, AVX2 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
174 * has some extended features that should've been in SSE |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
175 * in general, etc. |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
176 * |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
177 * For this, I've just decided to keep the function |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
178 * definitions private, and fill in as we go, with newer |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
179 * intrinsics preferred. Others are arbitrary and are |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
180 * mutually exclusive (i.e. Altivec vs NEON). This is simply |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
181 * the easiest way to go about it :) */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
182 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
183 /* --- 512-bit */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
184 #ifdef VEC_COMPILER_HAS_AVX512DQ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
185 if (cpu & VEC_CPU_HAS_AVX512DQ) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
186 /* these give us native multiply instructions */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
187 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512dq); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
188 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512dq); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
189 } |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
190 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
191 #ifdef VEC_COMPILER_HAS_AVX512BW |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
192 if (cpu & VEC_CPU_HAS_AVX512BW) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
193 FILL_GIVEN_FUNC_PTRS( , 8, 64, avx512bw); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
194 FILL_GIVEN_FUNC_PTRS(u, 8, 64, avx512bw); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
195 FILL_GIVEN_FUNC_PTRS( , 16, 32, avx512bw); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
196 FILL_GIVEN_FUNC_PTRS(u, 16, 32, avx512bw); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
197 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
198 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
199 #ifdef VEC_COMPILER_HAS_AVX512F |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
200 if (cpu & VEC_CPU_HAS_AVX512F) { |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
201 FILL_GIVEN_FUNC_PTRS( , 32, 16, avx512f); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
202 FILL_GIVEN_FUNC_PTRS(u, 32, 16, avx512f); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
203 FILL_GIVEN_FUNC_PTRS( , 64, 8, avx512f); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
204 FILL_GIVEN_FUNC_PTRS(u, 64, 8, avx512f); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
205 } |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
206 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
207 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
208 /* --- 256-bit */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
209 #ifdef VEC_COMPILER_HAS_AVX2 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
210 if (cpu & VEC_CPU_HAS_AVX2) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
211 FILL_GIVEN_FUNC_PTRS( , 8, 32, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
212 FILL_GIVEN_FUNC_PTRS(u, 8, 32, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
213 FILL_GIVEN_FUNC_PTRS( , 16, 16, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
214 FILL_GIVEN_FUNC_PTRS(u, 16, 16, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
215 FILL_GIVEN_FUNC_PTRS( , 32, 8, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
216 FILL_GIVEN_FUNC_PTRS(u, 32, 8, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
217 FILL_GIVEN_FUNC_PTRS( , 64, 4, avx2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
218 FILL_GIVEN_FUNC_PTRS(u, 64, 4, avx2); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
219 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
220 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
221 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
222 /* --- 128-bit */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
223 #ifdef VEC_COMPILER_HAS_SSE42 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
224 if (cpu & VEC_CPU_HAS_SSE41) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
225 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse42); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
226 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse42); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
227 } |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
228 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
229 #ifdef VEC_COMPILER_HAS_SSE41 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
230 if (cpu & VEC_CPU_HAS_SSE41) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
231 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
232 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
233 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
234 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
235 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
236 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
237 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
238 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse41); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
239 } |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
240 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
241 #ifdef VEC_COMPILER_HAS_SSE3 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
242 if (cpu & VEC_CPU_HAS_SSE3) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
243 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
244 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
245 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
246 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
247 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
248 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
249 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse3); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
250 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse3); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
251 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
252 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
253 #ifdef VEC_COMPILER_HAS_SSE2 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
254 if (cpu & VEC_CPU_HAS_SSE2) { |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
255 FILL_GIVEN_FUNC_PTRS( , 8, 16, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
256 FILL_GIVEN_FUNC_PTRS(u, 8, 16, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
257 FILL_GIVEN_FUNC_PTRS( , 16, 8, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
258 FILL_GIVEN_FUNC_PTRS(u, 16, 8, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
259 FILL_GIVEN_FUNC_PTRS( , 32, 4, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
260 FILL_GIVEN_FUNC_PTRS(u, 32, 4, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
261 FILL_GIVEN_FUNC_PTRS( , 64, 2, sse2); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
262 FILL_GIVEN_FUNC_PTRS(u, 64, 2, sse2); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
263 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
264 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
265 #ifdef VEC_COMPILER_HAS_NEON |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
266 if (cpu & VEC_CPU_HAS_NEON) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
267 FILL_GIVEN_FUNC_PTRS( , 8, 16, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
268 FILL_GIVEN_FUNC_PTRS(u, 8, 16, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
269 FILL_GIVEN_FUNC_PTRS( , 16, 8, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
270 FILL_GIVEN_FUNC_PTRS(u, 16, 8, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
271 FILL_GIVEN_FUNC_PTRS( , 32, 4, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
272 FILL_GIVEN_FUNC_PTRS(u, 32, 4, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
273 FILL_GIVEN_FUNC_PTRS( , 64, 2, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
274 FILL_GIVEN_FUNC_PTRS(u, 64, 2, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
275 } |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
276 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
277 #ifdef VEC_COMPILER_HAS_ALTIVEC |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
278 if (cpu & VEC_CPU_HAS_ALTIVEC) { |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
279 FILL_GIVEN_FUNC_PTRS( , 8, 16, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
280 FILL_GIVEN_FUNC_PTRS(u, 8, 16, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
281 FILL_GIVEN_FUNC_PTRS( , 16, 8, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
282 FILL_GIVEN_FUNC_PTRS(u, 16, 8, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
283 FILL_GIVEN_FUNC_PTRS( , 32, 4, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
284 FILL_GIVEN_FUNC_PTRS(u, 32, 4, altivec); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
285 } |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
286 #endif |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
287 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
288 /* --- 64-bit */ |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
289 #ifdef VEC_COMPILER_HAS_MMX |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
290 if (cpu & VEC_CPU_HAS_MMX) { |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
291 FILL_GIVEN_FUNC_PTRS( , 8, 8, mmx); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
292 FILL_GIVEN_FUNC_PTRS(u, 8, 8, mmx); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
293 FILL_GIVEN_FUNC_PTRS( , 16, 4, mmx); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
294 FILL_GIVEN_FUNC_PTRS(u, 16, 4, mmx); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
295 FILL_GIVEN_FUNC_PTRS( , 32, 2, mmx); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
296 FILL_GIVEN_FUNC_PTRS(u, 32, 2, mmx); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
297 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
298 #endif |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
299 #ifdef VEC_COMPILER_HAS_NEON |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
300 if (cpu & VEC_CPU_HAS_NEON) { |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
301 FILL_GIVEN_FUNC_PTRS( , 8, 8, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
302 FILL_GIVEN_FUNC_PTRS(u, 8, 8, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
303 FILL_GIVEN_FUNC_PTRS( , 16, 4, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
304 FILL_GIVEN_FUNC_PTRS(u, 16, 4, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
305 FILL_GIVEN_FUNC_PTRS( , 32, 2, neon); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
306 FILL_GIVEN_FUNC_PTRS(u, 32, 2, neon); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
307 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
308 #endif |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
309 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
310 /* fill any remaining function pointers with generics */ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
311 FILL_GIVEN_FUNC_PTRS( , 8, 64, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
312 FILL_GIVEN_FUNC_PTRS(u, 8, 64, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
313 FILL_GIVEN_FUNC_PTRS( , 16, 32, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
314 FILL_GIVEN_FUNC_PTRS(u, 16, 32, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
315 FILL_GIVEN_FUNC_PTRS( , 32, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
316 FILL_GIVEN_FUNC_PTRS(u, 32, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
317 FILL_GIVEN_FUNC_PTRS( , 64, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
318 FILL_GIVEN_FUNC_PTRS(u, 64, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
319 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
320 FILL_GIVEN_FUNC_PTRS( , 8, 32, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
321 FILL_GIVEN_FUNC_PTRS(u, 8, 32, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
322 FILL_GIVEN_FUNC_PTRS( , 16, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
323 FILL_GIVEN_FUNC_PTRS(u, 16, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
324 FILL_GIVEN_FUNC_PTRS( , 32, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
325 FILL_GIVEN_FUNC_PTRS(u, 32, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
326 FILL_GIVEN_FUNC_PTRS( , 64, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
327 FILL_GIVEN_FUNC_PTRS(u, 64, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
328 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
329 FILL_GIVEN_FUNC_PTRS( , 8, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
330 FILL_GIVEN_FUNC_PTRS(u, 8, 16, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
331 FILL_GIVEN_FUNC_PTRS( , 16, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
332 FILL_GIVEN_FUNC_PTRS(u, 16, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
333 FILL_GIVEN_FUNC_PTRS( , 32, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
334 FILL_GIVEN_FUNC_PTRS(u, 32, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
335 FILL_GIVEN_FUNC_PTRS( , 64, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
336 FILL_GIVEN_FUNC_PTRS(u, 64, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
337 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
338 FILL_GIVEN_FUNC_PTRS( , 8, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
339 FILL_GIVEN_FUNC_PTRS(u, 8, 8, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
340 FILL_GIVEN_FUNC_PTRS( , 16, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
341 FILL_GIVEN_FUNC_PTRS(u, 16, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
342 FILL_GIVEN_FUNC_PTRS( , 32, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
343 FILL_GIVEN_FUNC_PTRS(u, 32, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
344 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
345 FILL_GIVEN_FUNC_PTRS( , 8, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
346 FILL_GIVEN_FUNC_PTRS(u, 8, 4, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
347 FILL_GIVEN_FUNC_PTRS( , 16, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
348 FILL_GIVEN_FUNC_PTRS(u, 16, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
349 |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
350 FILL_GIVEN_FUNC_PTRS( , 8, 2, generic); |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
351 FILL_GIVEN_FUNC_PTRS(u, 8, 2, generic); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
352 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
353 vec_init_spinner++; |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
354 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
355 return 0; |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
356 } |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
357 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
358 /* ---------------------------------------------------------------- */ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
359 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
360 #define VEC_DEFINE_OPERATIONS_SIGN(sign, bits, size) \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
361 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_splat(vec_##sign##int##bits x); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
362 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load_aligned(const vec_##sign##int##bits in[size]); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
363 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_load(const vec_##sign##int##bits in[size]); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
364 extern inline void v##sign##int##bits##x##size##_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
365 extern inline void v##sign##int##bits##x##size##_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
366 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
367 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
368 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
369 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_div(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
370 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
371 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
372 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
373 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
374 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
375 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
376 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
377 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
378 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
379 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
380 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ |
28
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
381 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_lrshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
382 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_min(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); \ |
c6c99ab1088a
*: add min/max functions and a big big refactor (again)
Paper <paper@tflc.us>
parents:
25
diff
changeset
|
383 extern inline v##sign##int##bits##x##size v##sign##int##bits##x##size##_max(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2); |
23
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
384 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
385 #define VEC_DEFINE_OPERATIONS(bits, size) \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
386 VEC_DEFINE_OPERATIONS_SIGN( , bits, size) \ |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
387 VEC_DEFINE_OPERATIONS_SIGN(u, bits, size) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
388 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
389 // 16-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
390 VEC_DEFINE_OPERATIONS(8, 2) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
391 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
392 // 32-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
393 VEC_DEFINE_OPERATIONS(8, 4) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
394 VEC_DEFINE_OPERATIONS(16, 2) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
395 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
396 // 64-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
397 VEC_DEFINE_OPERATIONS(8, 8) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
398 VEC_DEFINE_OPERATIONS(16, 4) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
399 VEC_DEFINE_OPERATIONS(32, 2) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
400 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
401 // 128-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
402 VEC_DEFINE_OPERATIONS(8, 16) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
403 VEC_DEFINE_OPERATIONS(16, 8) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
404 VEC_DEFINE_OPERATIONS(32, 4) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
405 VEC_DEFINE_OPERATIONS(64, 2) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
406 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
407 // 256-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
408 VEC_DEFINE_OPERATIONS(8, 32) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
409 VEC_DEFINE_OPERATIONS(16, 16) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
410 VEC_DEFINE_OPERATIONS(32, 8) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
411 VEC_DEFINE_OPERATIONS(64, 4) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
412 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
413 // 512-bit |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
414 VEC_DEFINE_OPERATIONS(8, 64) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
415 VEC_DEFINE_OPERATIONS(16, 32) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
416 VEC_DEFINE_OPERATIONS(32, 16) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
417 VEC_DEFINE_OPERATIONS(64, 8) |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
418 |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
419 #undef VEC_DEFINE_OPERATIONS |
e26874655738
*: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
15
diff
changeset
|
420 #undef VEC_DEFINE_OPERATIONS_SIGN |