annotate src/impl/arm/neon.c @ 27:d00b95f95dd1 default tip

impl/arm/neon: it compiles again, but is untested
author Paper <paper@tflc.us>
date Mon, 25 Nov 2024 00:33:02 -0500
parents e26874655738
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
1 /**
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
2 * vec - a tiny SIMD vector library in C99
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
3 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
4 * Copyright (c) 2024 Paper
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
5 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
7 * of this software and associated documentation files (the "Software"), to deal
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
8 * in the Software without restriction, including without limitation the rights
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
10 * copies of the Software, and to permit persons to whom the Software is
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
11 * furnished to do so, subject to the following conditions:
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
12 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
13 * The above copyright notice and this permission notice shall be included in all
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
14 * copies or substantial portions of the Software.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
15 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
22 * SOFTWARE.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
23 **/
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
24
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
25 #include "vec/impl/arm/neon.h"
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
26 #include "vec/impl/generic.h"
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
27
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
28 #include <arm_neon.h>
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
29
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
30 // There is LOTS of preprocessor hacking here (as if the other files
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
31 // weren't bad enough... lol)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
33 #define VEC_DEFINE_OPERATIONS_SIGN(sign, csign, bits, size) \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
34 union v##sign##int##bits##x##size##_impl_data { \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
35 v##sign##int##bits##x##size vec; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
36 sign##int##bits##x##size##_t neon; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
37 }; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
38 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
39 VEC_STATIC_ASSERT(VEC_ALIGNOF(sign##int##bits##x##size##_t) <= VEC_ALIGNOF(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " alignment needs to be expanded to fit intrinsic type size"); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
40 VEC_STATIC_ASSERT(sizeof(sign##int##bits##x##size##_t) <= sizeof(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " needs to be expanded to fit intrinsic type size"); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
41 \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
42 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_load_aligned(const vec_##sign##int##bits in[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
43 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
44 union v##sign##int##bits##x##size##_impl_data vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
45 vec.neon = vld1_##sign##bits(in); \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
46 return vec.vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
47 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
48 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
49 static void v##sign##int##bits##x##size##_neon_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
50 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
51 vstore_lane_##bits(sign, ((union v##sign##int##bits##x##size##_impl_data *)&vec)->neon, out); \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
52 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
53 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
54 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
55 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
56 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
57 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
58 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
59 vec1d->neon = vadd_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
60 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
61 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
62 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
63 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
64 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
65 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
66 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
67 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
68 vec1d->neon = vsub_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
69 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
70 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
71 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
72 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
73 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
74 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
75 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
76 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
77 vec1d->neon = vmul_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
78 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
79 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
80 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
81 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_lshift(v##sign##int##bits##x##size vec1, vuint##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
82 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
83 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
84 union vuint##bits##x##size##_impl_data *vec2d = (union vuint##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
85 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
86 vec1d->neon = vshl_##sign##bits(vec1d->neon, (vreinterpret_##bits##_u##bits)vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
87 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
88 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
89 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
90 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
91 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
92 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
93 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
94 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
95 vec1d->neon = vand_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
96 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
97 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
98 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
99 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
100 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
101 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
102 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
103 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
104 vec1d->neon = vorr_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
105 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
106 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
107 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
108 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_neon_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
109 { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
110 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
111 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
112 \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
113 vec1d->neon = veor_##sign##bits(vec1d->neon, vec2d->neon); \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
114 return vec1d->vec; \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
115 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
116 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
117 static v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_neon = { \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
118 v##sign##int##bits##x##size##_fallback_splat, \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
119 v##sign##int##bits##x##size##_neon_load_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
120 v##sign##int##bits##x##size##_neon_load_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
121 v##sign##int##bits##x##size##_neon_store_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
122 v##sign##int##bits##x##size##_neon_store_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
123 v##sign##int##bits##x##size##_neon_add, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
124 v##sign##int##bits##x##size##_neon_sub, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
125 v##sign##int##bits##x##size##_neon_mul, \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
126 v##sign##int##bits##x##size##_fallback_div, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
127 v##sign##int##bits##x##size##_fallback_avg, \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
128 v##sign##int##bits##x##size##_neon_and, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
129 v##sign##int##bits##x##size##_neon_or, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
130 v##sign##int##bits##x##size##_neon_xor, \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
131 v##sign##int##bits##x##size##_fallback_not, \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
132 v##sign##int##bits##x##size##_neon_lshift, \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
133 v##sign##int##bits##x##size##_fallback_rshift, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
134 v##sign##int##bits##x##size##_fallback_lrshift, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
135 v##sign##int##bits##x##size##_fallback_cmplt, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
136 v##sign##int##bits##x##size##_fallback_cmple, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
137 v##sign##int##bits##x##size##_fallback_cmpeq, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
138 v##sign##int##bits##x##size##_fallback_cmpge, \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
139 v##sign##int##bits##x##size##_fallback_cmpgt, \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
140 };
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
141
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
142 #define VEC_DEFINE_OPERATIONS(bits, size) \
27
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
143 VEC_DEFINE_OPERATIONS_SIGN(u, U, bits, size) \
d00b95f95dd1 impl/arm/neon: it compiles again, but is untested
Paper <paper@tflc.us>
parents: 23
diff changeset
144 VEC_DEFINE_OPERATIONS_SIGN( , , bits, size)
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
145
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
146 // Ok, we'll start out with the 64-bit types.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
147
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
148 #define vadd_8 vadd_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
149 #define vadd_16 vadd_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
150 #define vadd_32 vadd_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
151 #define vsub_8 vsub_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
152 #define vsub_16 vsub_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
153 #define vsub_32 vsub_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
154 #define vmul_8 vmul_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
155 #define vmul_16 vmul_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
156 #define vmul_32 vmul_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
157 #define vshl_8 vshl_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
158 #define vshl_16 vshl_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
159 #define vshl_32 vshl_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
160 #define veor_8 veor_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
161 #define veor_16 veor_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
162 #define veor_32 veor_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
163 #define vorr_8 vorr_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
164 #define vorr_16 vorr_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
165 #define vorr_32 vorr_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
166 #define vand_8 vand_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
167 #define vand_16 vand_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
168 #define vand_32 vand_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
169 #define vld1_8 vld1_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
170 #define vld1_16 vld1_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
171 #define vld1_32 vld1_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
172 #define vget_lane_8 vget_lane_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
173 #define vget_lane_16 vget_lane_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
174 #define vget_lane_32 vget_lane_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
175 #define vstore_lane_8(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
176 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
177 out[0] = vget_lane_##sign##8(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
178 out[1] = vget_lane_##sign##8(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
179 out[2] = vget_lane_##sign##8(vec, 2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
180 out[3] = vget_lane_##sign##8(vec, 3); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
181 out[4] = vget_lane_##sign##8(vec, 4); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
182 out[5] = vget_lane_##sign##8(vec, 5); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
183 out[6] = vget_lane_##sign##8(vec, 6); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
184 out[7] = vget_lane_##sign##8(vec, 7); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
185 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
186 #define vstore_lane_16(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
187 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
188 out[0] = vget_lane_##sign##16(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
189 out[1] = vget_lane_##sign##16(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
190 out[2] = vget_lane_##sign##16(vec, 2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
191 out[3] = vget_lane_##sign##16(vec, 3); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
192 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
193 #define vstore_lane_32(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
194 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
195 out[0] = vget_lane_##sign##32(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
196 out[1] = vget_lane_##sign##32(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
197 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
198 #define vreinterpret_8_u8(x) vreinterpret_s8_u8(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
199 #define vreinterpret_16_u16(x) vreinterpret_s16_u16(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
200 #define vreinterpret_32_u32(x) vreinterpret_s32_u32(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
201
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
202 VEC_DEFINE_OPERATIONS(8, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
203 VEC_DEFINE_OPERATIONS(16, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
204 VEC_DEFINE_OPERATIONS(32, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
205
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
206 #undef vadd_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
207 #undef vadd_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
208 #undef vadd_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
209 #undef vsub_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
210 #undef vsub_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
211 #undef vsub_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
212 #undef vmul_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
213 #undef vmul_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
214 #undef vmul_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
215 #undef vshl_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
216 #undef vshl_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
217 #undef vshl_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
218 #undef veor_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
219 #undef veor_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
220 #undef veor_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
221 #undef vorr_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
222 #undef vorr_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
223 #undef vorr_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
224 #undef vand_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
225 #undef vand_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
226 #undef vand_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
227 #undef vld1_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
228 #undef vld1_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
229 #undef vld1_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
230 #undef vget_lane_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
231 #undef vget_lane_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
232 #undef vget_lane_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
233 #undef vstore_lane_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
234 #undef vstore_lane_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
235 #undef vstore_lane_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
236 #undef vreinterpret_8_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
237 #undef vreinterpret_16_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
238 #undef vreinterpret_32_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
239
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
240 ///////////////////////////////////////////////////////////////////////////////
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
241 // 128-bit
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
242
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
243 // Now we can go ahead and do the 128-bit ones.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
244
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
245 // NEON doesn't have native 64-bit multiplication, so we have
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
246 // to do it ourselves
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
247 static inline int64x2_t vmulq_s64(const int64x2_t a, const int64x2_t b)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
248 {
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
249 const uint32x2_t ac = vreinterpret_u32_s32(vmovn_s64(a));
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
250 const uint32x2_t pr = vreinterpret_u32_s32(vmovn_s64(b));
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
251
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
252 const int32x4_t hi = vmulq_s32(vreinterpretq_s32_s64(b), vreinterpretq_s32_s64(a));
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
253
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
254 return vreinterpretq_s64_u64(vmlal_u32(vreinterpretq_u64_s64(vshlq_n_s64(vreinterpretq_s64_u64(vpaddlq_u32(vreinterpretq_u32_s32(hi))), 32)), ac, pr));
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
255 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
256
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
257 static inline uint64x2_t vmulq_u64(const uint64x2_t a, const uint64x2_t b)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
258 {
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
259 const uint32x2_t ac = vmovn_u64(a);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
260 const uint32x2_t pr = vmovn_u64(b);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
261
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
262 const uint32x4_t hi = vmulq_u32(vreinterpretq_u32_u64(b), vreinterpretq_u32_u64(a));
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
263
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
264 return vmlal_u32(vshlq_n_u64(vpaddlq_u32(hi), 32), ac, pr);
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
265 }
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
266
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
267 #define vadd_8 vaddq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
268 #define vadd_16 vaddq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
269 #define vadd_32 vaddq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
270 #define vadd_64 vaddq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
271 #define vadd_u8 vaddq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
272 #define vadd_u16 vaddq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
273 #define vadd_u32 vaddq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
274 #define vadd_u64 vaddq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
275 #define vsub_8 vsubq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
276 #define vsub_16 vsubq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
277 #define vsub_32 vsubq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
278 #define vsub_64 vsubq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
279 #define vsub_u8 vsubq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
280 #define vsub_u16 vsubq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
281 #define vsub_u32 vsubq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
282 #define vsub_u64 vsubq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
283 #define vmul_8 vmulq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
284 #define vmul_16 vmulq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
285 #define vmul_32 vmulq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
286 #define vmul_64 vmulq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
287 #define vmul_u8 vmulq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
288 #define vmul_u16 vmulq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
289 #define vmul_u32 vmulq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
290 #define vmul_u64 vmulq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
291 #define vshl_8 vshlq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
292 #define vshl_16 vshlq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
293 #define vshl_32 vshlq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
294 #define vshl_64 vshlq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
295 #define vshl_u8 vshlq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
296 #define vshl_u16 vshlq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
297 #define vshl_u32 vshlq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
298 #define vshl_u64 vshlq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
299 #define veor_8 veorq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
300 #define veor_16 veorq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
301 #define veor_32 veorq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
302 #define veor_64 veorq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
303 #define veor_u8 veorq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
304 #define veor_u16 veorq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
305 #define veor_u32 veorq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
306 #define veor_u64 veorq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
307 #define vorr_8 vorrq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
308 #define vorr_16 vorrq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
309 #define vorr_32 vorrq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
310 #define vorr_64 vorrq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
311 #define vorr_u8 vorrq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
312 #define vorr_u16 vorrq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
313 #define vorr_u32 vorrq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
314 #define vorr_u64 vorrq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
315 #define vand_8 vandq_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
316 #define vand_16 vandq_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
317 #define vand_32 vandq_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
318 #define vand_64 vandq_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
319 #define vand_u8 vandq_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
320 #define vand_u16 vandq_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
321 #define vand_u32 vandq_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
322 #define vand_u64 vandq_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
323 #define vld1_8 vld1q_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
324 #define vld1_16 vld1q_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
325 #define vld1_32 vld1q_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
326 #define vld1_64 vld1q_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
327 #define vld1_u8 vld1q_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
328 #define vld1_u16 vld1q_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
329 #define vld1_u32 vld1q_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
330 #define vld1_u64 vld1q_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
331 #define vget_lane_8 vgetq_lane_s8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
332 #define vget_lane_16 vgetq_lane_s16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
333 #define vget_lane_32 vgetq_lane_s32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
334 #define vget_lane_64 vgetq_lane_s64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
335 #define vget_lane_u8 vgetq_lane_u8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
336 #define vget_lane_u16 vgetq_lane_u16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
337 #define vget_lane_u32 vgetq_lane_u32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
338 #define vget_lane_u64 vgetq_lane_u64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
339 #define vstore_lane_8(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
340 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
341 out[0] = vget_lane_##sign##8(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
342 out[1] = vget_lane_##sign##8(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
343 out[2] = vget_lane_##sign##8(vec, 2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
344 out[3] = vget_lane_##sign##8(vec, 3); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
345 out[4] = vget_lane_##sign##8(vec, 4); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
346 out[5] = vget_lane_##sign##8(vec, 5); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
347 out[6] = vget_lane_##sign##8(vec, 6); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
348 out[7] = vget_lane_##sign##8(vec, 7); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
349 out[8] = vget_lane_##sign##8(vec, 8); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
350 out[9] = vget_lane_##sign##8(vec, 9); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
351 out[10] = vget_lane_##sign##8(vec, 10); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
352 out[11] = vget_lane_##sign##8(vec, 11); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
353 out[12] = vget_lane_##sign##8(vec, 12); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
354 out[13] = vget_lane_##sign##8(vec, 13); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
355 out[14] = vget_lane_##sign##8(vec, 14); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
356 out[15] = vget_lane_##sign##8(vec, 15); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
357 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
358 #define vstore_lane_16(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
359 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
360 out[0] = vget_lane_##sign##16(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
361 out[1] = vget_lane_##sign##16(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
362 out[2] = vget_lane_##sign##16(vec, 2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
363 out[3] = vget_lane_##sign##16(vec, 3); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
364 out[4] = vget_lane_##sign##16(vec, 4); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
365 out[5] = vget_lane_##sign##16(vec, 5); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
366 out[6] = vget_lane_##sign##16(vec, 6); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
367 out[7] = vget_lane_##sign##16(vec, 7); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
368 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
369 #define vstore_lane_32(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
370 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
371 out[0] = vget_lane_##sign##32(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
372 out[1] = vget_lane_##sign##32(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
373 out[2] = vget_lane_##sign##32(vec, 2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
374 out[3] = vget_lane_##sign##32(vec, 3); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
375 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
376 #define vstore_lane_64(sign, vec, out) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
377 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
378 out[0] = vget_lane_##sign##64(vec, 0); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
379 out[1] = vget_lane_##sign##64(vec, 1); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
380 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
381 #define vreinterpret_8_u8(x) vreinterpretq_s8_u8(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
382 #define vreinterpret_16_u16(x) vreinterpretq_s16_u16(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
383 #define vreinterpret_32_u32(x) vreinterpretq_s32_u32(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
384 #define vreinterpret_64_u64(x) vreinterpretq_s64_u64(x)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
385
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
386 VEC_DEFINE_OPERATIONS(8, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
387 VEC_DEFINE_OPERATIONS(16, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
388 VEC_DEFINE_OPERATIONS(32, 4)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
389 VEC_DEFINE_OPERATIONS(64, 2)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
390
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
391 #undef vadd_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
392 #undef vadd_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
393 #undef vadd_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
394 #undef vadd_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
395 #undef vsub_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
396 #undef vsub_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
397 #undef vsub_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
398 #undef vsub_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
399 #undef vmul_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
400 #undef vmul_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
401 #undef vmul_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
402 #undef vmul_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
403 #undef vshl_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
404 #undef vshl_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
405 #undef vshl_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
406 #undef vshl_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
407 #undef veor_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
408 #undef veor_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
409 #undef veor_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
410 #undef veor_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
411 #undef vorr_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
412 #undef vorr_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
413 #undef vorr_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
414 #undef vorr_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
415 #undef vand_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
416 #undef vand_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
417 #undef vand_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
418 #undef vand_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
419 #undef vld1_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
420 #undef vld1_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
421 #undef vld1_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
422 #undef vld1_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
423 #undef vget_lane_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
424 #undef vget_lane_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
425 #undef vget_lane_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
426 #undef vget_lane_64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
427 #undef vstore_lane_8
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
428 #undef vstore_lane_16
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
429 #undef vstore_lane_32
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
430 #undef vstore_lane_64