Mercurial > vec
comparison src/impl/ppc/altivec.c @ 23:e26874655738
*: huge refactor, new major release (hahaha)
I keep finding things that are broken...
The problem NOW was that vec would unintentionally build some
functions with extended instruction sets, which is Bad and would
mean that for all intents and purposes the CPU detection was
completely broken.
Now vec is no longer header only either. Boohoo. However this gives
a lot more flexibility to vec since we no longer want or need to
care about C++ crap.
The NEON and Altivec implementations have not been updated which
means they won't compile hence why they're commented out in the
cmake build file.
author | Paper <paper@tflc.us> |
---|---|
date | Sun, 24 Nov 2024 02:52:40 -0500 |
parents | |
children | 92156fe32755 |
comparison
equal
deleted
inserted
replaced
22:fbcd3fa6f8fc | 23:e26874655738 |
---|---|
1 /** | |
2 * vec - a tiny SIMD vector library in C99 | |
3 * | |
4 * Copyright (c) 2024 Paper | |
5 * | |
6 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 * of this software and associated documentation files (the "Software"), to deal | |
8 * in the Software without restriction, including without limitation the rights | |
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 * copies of the Software, and to permit persons to whom the Software is | |
11 * furnished to do so, subject to the following conditions: | |
12 * | |
13 * The above copyright notice and this permission notice shall be included in all | |
14 * copies or substantial portions of the Software. | |
15 * | |
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
22 * SOFTWARE. | |
23 **/ | |
24 | |
25 #include "vec/impl/ppc/altivec.h" | |
26 | |
27 #include <altivec.h> | |
28 | |
29 /* GCC 4.2.1 on Mac OS X doesn't have these for some reason */ | |
30 #ifdef vec_mul | |
31 # define VEC_ALTIVEC_DEFINE_MUL(sign, csign, bits, size) \ | |
32 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
33 { \ | |
34 v##sign##int##bits##x##size vec; \ | |
35 vec.altivec = vec_mul(vec1.altivec, vec2.altivec); \ | |
36 return vec; \ | |
37 } | |
38 # define VEC_ALTIVEC_STRUCT_MUL(sign, csign, bits, size) \ | |
39 v##sign##int##bits##x##size##_altivec_mul | |
40 #else | |
41 # define VEC_ALTIVEC_DEFINE_MUL(sign, csign, bits, size) | |
42 # define VEC_ALTIVEC_STRUCT_MUL(sign, csign, bits, size) NULL | |
43 #endif | |
44 | |
45 #ifdef vec_splats | |
46 # define VEC_ALTIVEC_DEFINE_SPLAT(sign, csign, bits, size) \ | |
47 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_splat(vec_##sign##int##bits x) \ | |
48 { \ | |
49 v##sign##int##bits##x##size vec; \ | |
50 vec.altivec = vec_splats(x); \ | |
51 return vec; \ | |
52 } | |
53 # define VEC_ALTIVEC_STRUCT_SPLAT(sign, csign, bits, size) \ | |
54 v##sign##int##bits##x##size##_altivec_splat | |
55 #else | |
56 # define VEC_ALTIVEC_DEFINE_SPLAT(sign, csign, bits, size) | |
57 # define VEC_ALTIVEC_STRUCT_SPLAT(sign, csign, bits, size) NULL | |
58 #endif | |
59 | |
60 #define VEC_ALTIVEC_uRSHIFT vec_sr | |
61 #define VEC_ALTIVEC_RSHIFT vec_sra | |
62 | |
63 #define VEC_ALTIVEC_DEFINE_uLRSHIFT(sign, csign, bits, size) \ | |
64 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_lrshift(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
65 { \ | |
66 v##sign##int##bits##x##size vec; \ | |
67 vec.altivec = vec_sr(vec1.altivec, vec2.altivec); \ | |
68 return vec; \ | |
69 } | |
70 #define VEC_ALTIVEC_STRUCT_uLRSHIFT(sign, csign, bits, size) \ | |
71 v##sign##int##bits##x##size##_altivec_lrshift | |
72 | |
73 #define VEC_ALTIVEC_DEFINE_LRSHIFT(sign, csign, bits, size) | |
74 #define VEC_ALTIVEC_STRUCT_LRSHIFT(sign, csign, bits, size) NULL | |
75 | |
76 #define VEC_ALTIVEC_CAST_BOOL_8 (vector signed char) | |
77 #define VEC_ALTIVEC_CAST_BOOL_U8 (vector unsigned char) | |
78 #define VEC_ALTIVEC_CAST_BOOL_16 (vector signed short) | |
79 #define VEC_ALTIVEC_CAST_BOOL_U16 (vector unsigned short) | |
80 #define VEC_ALTIVEC_CAST_BOOL_32 (vector signed int) | |
81 #define VEC_ALTIVEC_CAST_BOOL_U32 (vector unsigned int) | |
82 | |
83 /* Since altivec conveniently made their API super user friendly, we can just use | |
84 * one giant macro to define literally everything */ | |
85 #define VEC_DEFINE_OPERATIONS_SIGN(sign, csign, bits, size) \ | |
86 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_load_aligned(const vec_##sign##int##bits in[size]) \ | |
87 { \ | |
88 v##sign##int##bits##x##size vec; \ | |
89 vec.altivec = vec_ld(0, in); \ | |
90 return vec; \ | |
91 } \ | |
92 \ | |
93 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_load(const vec_##sign##int##bits in[size]) \ | |
94 { \ | |
95 v##sign##int##bits##x##size vec; \ | |
96 vec.altivec = vec_perm(vec_ld(0, in), vec_ld(15, in), vec_lvsl(0, in)); \ | |
97 return vec; \ | |
98 } \ | |
99 \ | |
100 static void v##sign##int##bits##x##size##_altivec_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \ | |
101 { \ | |
102 vec_st(vec.altivec, 0, out); \ | |
103 } \ | |
104 \ | |
105 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
106 { \ | |
107 v##sign##int##bits##x##size vec; \ | |
108 vec.altivec = vec_add(vec1.altivec, vec2.altivec); \ | |
109 return vec; \ | |
110 } \ | |
111 \ | |
112 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
113 { \ | |
114 v##sign##int##bits##x##size vec; \ | |
115 vec.altivec = vec_sub(vec1.altivec, vec2.altivec); \ | |
116 return vec; \ | |
117 } \ | |
118 \ | |
119 VEC_ALTIVEC_DEFINE_MUL(sign, csign, bits, size) \ | |
120 \ | |
121 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ | |
122 { \ | |
123 v##sign##int##bits##x##size vec; \ | |
124 vec.altivec = vec_sl(vec1.altivec, vec2.altivec); \ | |
125 return vec; \ | |
126 } \ | |
127 \ | |
128 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_rshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \ | |
129 { \ | |
130 v##sign##int##bits##x##size vec; \ | |
131 vec.altivec = VEC_ALTIVEC_##sign##RSHIFT(vec1.altivec, vec2.altivec); \ | |
132 return vec; \ | |
133 } \ | |
134 \ | |
135 VEC_ALTIVEC_DEFINE_##sign##LRSHIFT(sign, csign, bits, size) \ | |
136 \ | |
137 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_avg(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
138 { \ | |
139 v##sign##int##bits##x##size vec; \ | |
140 vec.altivec = vec_avg(vec1.altivec, vec2.altivec); \ | |
141 return vec; \ | |
142 } \ | |
143 \ | |
144 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
145 { \ | |
146 v##sign##int##bits##x##size vec; \ | |
147 vec.altivec = vec_and(vec1.altivec, vec2.altivec); \ | |
148 return vec; \ | |
149 } \ | |
150 \ | |
151 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
152 { \ | |
153 v##sign##int##bits##x##size vec; \ | |
154 vec.altivec = vec_or(vec1.altivec, vec2.altivec); \ | |
155 return vec; \ | |
156 } \ | |
157 \ | |
158 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
159 { \ | |
160 v##sign##int##bits##x##size vec; \ | |
161 vec.altivec = vec_xor(vec1.altivec, vec2.altivec); \ | |
162 return vec; \ | |
163 } \ | |
164 \ | |
165 VEC_ALTIVEC_DEFINE_SPLAT(sign, csign, bits, size) \ | |
166 \ | |
167 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_cmplt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
168 { \ | |
169 v##sign##int##bits##x##size vec; \ | |
170 vec.altivec = VEC_ALTIVEC_CAST_BOOL_##csign##bits vec_cmplt(vec1.altivec, vec2.altivec); \ | |
171 return vec; \ | |
172 } \ | |
173 \ | |
174 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_cmple(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
175 { \ | |
176 v##sign##int##bits##x##size vec; \ | |
177 vec.altivec = VEC_ALTIVEC_CAST_BOOL_##csign##bits vec_or(vec_cmplt(vec1.altivec, vec2.altivec), vec_cmpeq(vec1.altivec, vec2.altivec)); \ | |
178 return vec; \ | |
179 } \ | |
180 \ | |
181 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_cmpeq(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
182 { \ | |
183 v##sign##int##bits##x##size vec; \ | |
184 vec.altivec = VEC_ALTIVEC_CAST_BOOL_##csign##bits vec_cmpeq(vec1.altivec, vec2.altivec); \ | |
185 return vec; \ | |
186 } \ | |
187 \ | |
188 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_cmpge(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
189 { \ | |
190 v##sign##int##bits##x##size vec; \ | |
191 vec.altivec = VEC_ALTIVEC_CAST_BOOL_##csign##bits vec_or(vec_cmpgt(vec1.altivec, vec2.altivec), vec_cmpeq(vec1.altivec, vec2.altivec)); \ | |
192 return vec; \ | |
193 } \ | |
194 \ | |
195 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_altivec_cmpgt(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \ | |
196 { \ | |
197 v##sign##int##bits##x##size vec; \ | |
198 vec.altivec = VEC_ALTIVEC_CAST_BOOL_##csign##bits vec_cmpgt(vec1.altivec, vec2.altivec); \ | |
199 return vec; \ | |
200 } \ | |
201 \ | |
202 static v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_altivec = { \ | |
203 VEC_ALTIVEC_STRUCT_SPLAT(sign, csign, bits, size), \ | |
204 v##sign##int##bits##x##size##_altivec_load_aligned, \ | |
205 v##sign##int##bits##x##size##_altivec_load, \ | |
206 v##sign##int##bits##x##size##_altivec_store_aligned, \ | |
207 /* .store = */ NULL, \ | |
208 v##sign##int##bits##x##size##_altivec_add, \ | |
209 v##sign##int##bits##x##size##_altivec_sub, \ | |
210 VEC_ALTIVEC_STRUCT_MUL(sign, csign, bits, size), \ | |
211 /* .div = */ NULL, \ | |
212 v##sign##int##bits##x##size##_altivec_avg, \ | |
213 v##sign##int##bits##x##size##_altivec_and, \ | |
214 v##sign##int##bits##x##size##_altivec_or, \ | |
215 v##sign##int##bits##x##size##_altivec_xor, \ | |
216 /* .not = */ NULL, \ | |
217 v##sign##int##bits##x##size##_altivec_lshift, \ | |
218 v##sign##int##bits##x##size##_altivec_rshift, \ | |
219 VEC_ALTIVEC_STRUCT_##sign##LRSHIFT(sign, csign, bits, size), \ | |
220 v##sign##int##bits##x##size##_altivec_cmplt, \ | |
221 v##sign##int##bits##x##size##_altivec_cmple, \ | |
222 v##sign##int##bits##x##size##_altivec_cmpeq, \ | |
223 v##sign##int##bits##x##size##_altivec_cmpge, \ | |
224 v##sign##int##bits##x##size##_altivec_cmpgt, \ | |
225 }; | |
226 | |
227 #define VEC_DEFINE_OPERATIONS(bits, size) \ | |
228 VEC_DEFINE_OPERATIONS_SIGN( , , bits, size) \ | |
229 VEC_DEFINE_OPERATIONS_SIGN(u, U, bits, size) | |
230 | |
231 VEC_DEFINE_OPERATIONS(8, 16) | |
232 VEC_DEFINE_OPERATIONS(16, 8) | |
233 VEC_DEFINE_OPERATIONS(32, 4) |