annotate src/impl/x86/avx2.c @ 25:92156fe32755

impl/ppc/altivec: update to new implementation the signed average function is wrong; it needs to round up the number when only one of them is odd, but that doesn't necessarily seem to be true because altivec is weird, and that's what we need to emulate the quirks for. ugh. also the altivec backend uses the generic functions instead of fallbacks because it does indeed use the exact same memory structure as the generic implementation...
author Paper <paper@tflc.us>
date Sun, 24 Nov 2024 11:15:59 +0000
parents e49e70f7012f
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
1 /**
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
2 * vec - a tiny SIMD vector library in C99
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
3 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
4 * Copyright (c) 2024 Paper
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
5 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
7 * of this software and associated documentation files (the "Software"), to deal
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
8 * in the Software without restriction, including without limitation the rights
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
10 * copies of the Software, and to permit persons to whom the Software is
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
11 * furnished to do so, subject to the following conditions:
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
12 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
13 * The above copyright notice and this permission notice shall be included in all
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
14 * copies or substantial portions of the Software.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
15 *
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
22 * SOFTWARE.
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
23 **/
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
24
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
25 #include "vec/impl/x86/avx2.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
26 #include "vec/impl/generic.h"
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
27
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
28 #include <immintrin.h>
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
29
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
30 // this does NOT handle sign bits properly, use with caution
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
31 #define VEC_AVX2_OPERATION_8x32_16x16(op, sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
32 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
33 union v##sign##int8x32_impl_data *vec1d = (union v##sign##int8x32_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
34 union v##sign##int8x32_impl_data *vec2d = (union v##sign##int8x32_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
35 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
36 /* unpack and multiply */ \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
37 __m256i dst_even = _mm256_##op##_epi16(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
38 __m256i dst_odd = _mm256_##op##_epi16(_mm256_srli_epi16(vec1d->avx2, 8), _mm256_srli_epi16(vec2d->avx2, 8)); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
39 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
40 /* repack */ \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
41 vec1d->avx2 = _mm256_or_si256( \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
42 _mm256_slli_epi16(dst_odd, 8), \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
43 _mm256_srli_epi16(_mm256_slli_epi16(dst_even, 8), 8) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
44 ); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
45 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
46 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
47
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
48 #define VEC_AVX2_OPERATION_16x16(op, sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
49 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
50 union v##sign##int16x16_impl_data *vec1d = (union v##sign##int16x16_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
51 union v##sign##int16x16_impl_data *vec2d = (union v##sign##int16x16_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
52 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
53 /* unpack and multiply */ \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
54 __m256i dst_even = _mm256_##op##_epi32(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
55 __m256i dst_odd = _mm256_##op##_epi32(_mm256_srli_epi32(vec1d->avx2, 16), _mm256_srli_epi32(vec2d->avx2, 16)); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
56 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
57 /* repack */ \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
58 vec1d->avx2 = _mm256_or_si256( \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
59 _mm256_slli_epi32(dst_odd, 16), \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
60 _mm256_srli_epi32(_mm256_slli_epi16(dst_even, 16), 16) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
61 ); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
62 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
63 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
64
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
65 // multiplication
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
66
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
67 #define VEC_AVX2_MUL_8x32(sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
68 VEC_AVX2_OPERATION_8x32_16x16(mullo, sign)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
69
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
70 #define VEC_AVX2_MUL_16x16(sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
71 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
72 union v##sign##int16x16_impl_data *vec1d = (union v##sign##int16x16_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
73 union v##sign##int16x16_impl_data *vec2d = (union v##sign##int16x16_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
74 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
75 vec1d->avx2 = _mm256_mullo_epi16(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
76 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
77 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
78
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
79 #define VEC_AVX2_MUL_32x8(sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
80 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
81 union v##sign##int32x8_impl_data *vec1d = (union v##sign##int32x8_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
82 union v##sign##int32x8_impl_data *vec2d = (union v##sign##int32x8_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
83 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
84 vec1d->avx2 = _mm256_mullo_epi32(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
85 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
86 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
87
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
88 #define VEC_AVX2_MUL_64x4(sign) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
89 do { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
90 union v##sign##int64x4_impl_data *vec1d = (union v##sign##int64x4_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
91 union v##sign##int64x4_impl_data *vec2d = (union v##sign##int64x4_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
92 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
93 __m256i ac = _mm256_mul_epu32(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
94 __m256i b = _mm256_srli_epi64(vec1d->avx2, 32); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
95 __m256i bc = _mm256_mul_epu32(b, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
96 __m256i d = _mm256_srli_epi64(vec2d->avx2, 32); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
97 __m256i ad = _mm256_mul_epu32(vec1d->avx2, d); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
98 __m256i hi = _mm256_add_epi64(bc, ad); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
99 hi = _mm256_slli_epi64(hi, 32); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
100 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
101 vec1d->avx2 = _mm256_add_epi64(hi, ac); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
102 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
103 } while (0)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
104
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
105 // operations
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
106
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
107 #define VEC_AVX2_DEFINE_OPERATIONS_SIGN(sign, bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
108 union v##sign##int##bits##x##size##_impl_data { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
109 v##sign##int##bits##x##size vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
110 __m256i avx2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
111 }; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
112 \
24
e49e70f7012f impl/x86: add static assertions for alignment and size
Paper <paper@tflc.us>
parents: 23
diff changeset
113 VEC_STATIC_ASSERT(VEC_ALIGNOF(__m256i) <= VEC_ALIGNOF(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " alignment needs to be expanded to fit intrinsic type size"); \
e49e70f7012f impl/x86: add static assertions for alignment and size
Paper <paper@tflc.us>
parents: 23
diff changeset
114 VEC_STATIC_ASSERT(sizeof(__m256i) <= sizeof(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " needs to be expanded to fit intrinsic type size"); \
e49e70f7012f impl/x86: add static assertions for alignment and size
Paper <paper@tflc.us>
parents: 23
diff changeset
115 \
23
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
116 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_load_aligned(const vec_##sign##int##bits in[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
117 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
118 union v##sign##int##bits##x##size##_impl_data vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
119 vec.avx2 = _mm256_load_si256((const __m256i *)in); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
120 return vec.vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
121 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
122 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
123 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_load(const vec_##sign##int##bits in[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
124 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
125 union v##sign##int##bits##x##size##_impl_data vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
126 vec.avx2 = _mm256_loadu_si256((const __m256i *)in); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
127 return vec.vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
128 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
129 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
130 static void v##sign##int##bits##x##size##_avx2_store_aligned(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
131 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
132 _mm256_store_si256((__m256i *)out, ((union v##sign##int##bits##x##size##_impl_data*)&vec)->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
133 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
134 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
135 static void v##sign##int##bits##x##size##_avx2_store(v##sign##int##bits##x##size vec, vec_##sign##int##bits out[size]) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
136 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
137 _mm256_storeu_si256((__m256i *)out, ((union v##sign##int##bits##x##size##_impl_data*)&vec)->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
138 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
139 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
140 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_add(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
141 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
142 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
143 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
144 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
145 vec1d->avx2 = _mm256_add_epi##bits(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
146 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
147 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
148 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
149 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_sub(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
150 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
151 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
152 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
153 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
154 vec1d->avx2 = _mm256_sub_epi##bits(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
155 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
156 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
157 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
158 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_mul(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
159 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
160 VEC_AVX2_MUL_##bits##x##size(sign); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
161 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
162 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
163 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_and(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
164 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
165 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
166 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
167 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
168 vec1d->avx2 = _mm256_and_si256(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
169 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
170 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
171 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
172 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_or(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
173 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
174 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
175 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
176 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
177 vec1d->avx2 = _mm256_or_si256(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
178 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
179 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
180 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
181 static v##sign##int##bits##x##size v##sign##int##bits##x##size##_avx2_xor(v##sign##int##bits##x##size vec1, v##sign##int##bits##x##size vec2) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
182 { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
183 union v##sign##int##bits##x##size##_impl_data *vec1d = (union v##sign##int##bits##x##size##_impl_data *)&vec1; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
184 union v##sign##int##bits##x##size##_impl_data *vec2d = (union v##sign##int##bits##x##size##_impl_data *)&vec2; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
185 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
186 vec1d->avx2 = _mm256_xor_si256(vec1d->avx2, vec2d->avx2); \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
187 return vec1d->vec; \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
188 } \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
189 \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
190 const v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_avx2 = { \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
191 v##sign##int##bits##x##size##_generic_splat, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
192 v##sign##int##bits##x##size##_avx2_load_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
193 v##sign##int##bits##x##size##_avx2_load, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
194 v##sign##int##bits##x##size##_avx2_store_aligned, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
195 v##sign##int##bits##x##size##_avx2_store, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
196 v##sign##int##bits##x##size##_avx2_add, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
197 v##sign##int##bits##x##size##_avx2_sub, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
198 v##sign##int##bits##x##size##_avx2_mul, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
199 v##sign##int##bits##x##size##_generic_div, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
200 v##sign##int##bits##x##size##_generic_avg, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
201 v##sign##int##bits##x##size##_avx2_and, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
202 v##sign##int##bits##x##size##_avx2_or, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
203 v##sign##int##bits##x##size##_avx2_xor, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
204 v##sign##int##bits##x##size##_generic_not, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
205 v##sign##int##bits##x##size##_generic_lshift, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
206 v##sign##int##bits##x##size##_generic_rshift, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
207 v##sign##int##bits##x##size##_generic_lrshift, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
208 v##sign##int##bits##x##size##_generic_cmplt, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
209 v##sign##int##bits##x##size##_generic_cmple, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
210 v##sign##int##bits##x##size##_generic_cmpeq, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
211 v##sign##int##bits##x##size##_generic_cmpge, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
212 v##sign##int##bits##x##size##_generic_cmpgt, \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
213 };
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
214
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
215 #define VEC_AVX2_DEFINE_OPERATIONS(bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
216 VEC_AVX2_DEFINE_OPERATIONS_SIGN( , bits, size) \
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
217 VEC_AVX2_DEFINE_OPERATIONS_SIGN(u, bits, size)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
218
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
219 VEC_AVX2_DEFINE_OPERATIONS(8, 32)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
220 VEC_AVX2_DEFINE_OPERATIONS(16, 16)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
221 VEC_AVX2_DEFINE_OPERATIONS(32, 8)
e26874655738 *: huge refactor, new major release (hahaha)
Paper <paper@tflc.us>
parents:
diff changeset
222 VEC_AVX2_DEFINE_OPERATIONS(64, 4)