Mercurial > vec
comparison src/cpu.c @ 23:e26874655738
*: huge refactor, new major release (hahaha)
I keep finding things that are broken...
The problem NOW was that vec would unintentionally build some
functions with extended instruction sets, which is Bad and would
mean that for all intents and purposes the CPU detection was
completely broken.
Now vec is no longer header only either. Boohoo. However this gives
a lot more flexibility to vec since we no longer want or need to
care about C++ crap.
The NEON and Altivec implementations have not been updated which
means they won't compile hence why they're commented out in the
cmake build file.
author | Paper <paper@tflc.us> |
---|---|
date | Sun, 24 Nov 2024 02:52:40 -0500 (2 months ago) |
parents | |
children | 92156fe32755 |
comparison
equal
deleted
inserted
replaced
22:fbcd3fa6f8fc | 23:e26874655738 |
---|---|
1 /** | |
2 * vec - a tiny SIMD vector library in C99 | |
3 * | |
4 * Copyright (c) 2024 Paper | |
5 * | |
6 * Permission is hereby granted, free of charge, to any person obtaining a copy | |
7 * of this software and associated documentation files (the "Software"), to deal | |
8 * in the Software without restriction, including without limitation the rights | |
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
10 * copies of the Software, and to permit persons to whom the Software is | |
11 * furnished to do so, subject to the following conditions: | |
12 * | |
13 * The above copyright notice and this permission notice shall be included in all | |
14 * copies or substantial portions of the Software. | |
15 * | |
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
22 * SOFTWARE. | |
23 **/ | |
24 | |
25 /* Detect CPU SIMD support. Much of this code was stolen from SDL. | |
26 * | |
27 * Simple DirectMedia Layer | |
28 * Copyright (C) 1997-2024 Sam Lantinga <slouken@libsdl.org> | |
29 * | |
30 * This software is provided 'as-is', without any express or implied | |
31 * warranty. In no event will the authors be held liable for any damages | |
32 * arising from the use of this software. | |
33 * | |
34 * Permission is granted to anyone to use this software for any purpose, | |
35 * including commercial applications, and to alter it and redistribute it | |
36 * freely, subject to the following restrictions: | |
37 * | |
38 * 1. The origin of this software must not be misrepresented; you must not | |
39 * claim that you wrote the original software. If you use this software | |
40 * in a product, an acknowledgment in the product documentation would be | |
41 * appreciated but is not required. | |
42 * 2. Altered source versions must be plainly marked as such, and must not be | |
43 * misrepresented as being the original software. | |
44 * 3. This notice may not be removed or altered from any source distribution. | |
45 */ | |
46 | |
47 #include "vec/cpu.h" | |
48 | |
49 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__)) | |
50 # include <sys/sysctl.h> // For AltiVec check | |
51 #elif defined(__OpenBSD__) && defined(__powerpc__) | |
52 # include <sys/types.h> | |
53 # include <sys/sysctl.h> // For AltiVec check | |
54 # include <machine/cpu.h> | |
55 #elif defined(__FreeBSD__) && defined(__powerpc__) | |
56 # include <machine/cpu.h> | |
57 # include <sys/auxv.h> | |
58 #elif defined(__ALTIVEC__) | |
59 # include <signal.h> | |
60 # include <setjmp.h> | |
61 #endif | |
62 | |
63 #ifdef __FreeBSD__ | |
64 # include <sys/param.h> | |
65 #endif | |
66 | |
67 #if (defined(__linux__) || defined(__ANDROID__)) && defined(__arm__) | |
68 # include <unistd.h> | |
69 # include <sys/types.h> | |
70 # include <sys/stat.h> | |
71 # include <fcntl.h> | |
72 # include <elf.h> | |
73 | |
74 /*#include <asm/hwcap.h>*/ | |
75 # ifndef AT_HWCAP | |
76 # define AT_HWCAP 16 | |
77 # endif | |
78 # ifndef AT_PLATFORM | |
79 # define AT_PLATFORM 15 | |
80 # endif | |
81 # ifndef HWCAP_NEON | |
82 # define HWCAP_NEON (1 << 12) | |
83 # endif | |
84 #endif | |
85 | |
86 static inline int vec_CPU_have_CPUID(void) | |
87 { | |
88 int has_CPUID = 0; | |
89 | |
90 #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__) | |
91 __asm__ ( | |
92 " pushfl # Get original EFLAGS \n" | |
93 " popl %%eax \n" | |
94 " movl %%eax,%%ecx \n" | |
95 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" | |
96 " pushl %%eax # Save new EFLAGS value on stack \n" | |
97 " popfl # Replace current EFLAGS value \n" | |
98 " pushfl # Get new EFLAGS \n" | |
99 " popl %%eax # Store new EFLAGS in EAX \n" | |
100 " xorl %%ecx,%%eax # Can not toggle ID bit, \n" | |
101 " jz 1f # Processor=80486 \n" | |
102 " movl $1,%0 # We have CPUID support \n" | |
103 "1: \n" | |
104 : "=m" (has_CPUID) | |
105 : | |
106 : "%eax", "%ecx" | |
107 ); | |
108 #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__) | |
109 /* Technically, if this is being compiled under __x86_64__ then it has | |
110 CPUid by definition. But it's nice to be able to prove it. :) */ | |
111 __asm__ ( | |
112 " pushfq # Get original EFLAGS \n" | |
113 " popq %%rax \n" | |
114 " movq %%rax,%%rcx \n" | |
115 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n" | |
116 " pushq %%rax # Save new EFLAGS value on stack \n" | |
117 " popfq # Replace current EFLAGS value \n" | |
118 " pushfq # Get new EFLAGS \n" | |
119 " popq %%rax # Store new EFLAGS in EAX \n" | |
120 " xorl %%ecx,%%eax # Can not toggle ID bit, \n" | |
121 " jz 1f # Processor=80486 \n" | |
122 " movl $1,%0 # We have CPUID support \n" | |
123 "1: \n" | |
124 : "=m" (has_CPUID) | |
125 : | |
126 : "%rax", "%rcx" | |
127 ); | |
128 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
129 __asm { | |
130 pushfd ; Get original EFLAGS | |
131 pop eax | |
132 mov ecx, eax | |
133 xor eax, 200000h ; Flip ID bit in EFLAGS | |
134 push eax ; Save new EFLAGS value on stack | |
135 popfd ; Replace current EFLAGS value | |
136 pushfd ; Get new EFLAGS | |
137 pop eax ; Store new EFLAGS in EAX | |
138 xor eax, ecx ; Can not toggle ID bit, | |
139 jz done ; Processor=80486 | |
140 mov has_CPUID,1 ; We have CPUID support | |
141 done: | |
142 } | |
143 #elif defined(_MSC_VER) && defined(_M_X64) | |
144 has_CPUID = 1; | |
145 #elif defined(__sun) && defined(__i386) | |
146 __asm ( | |
147 " pushfl \n" | |
148 " popl %eax \n" | |
149 " movl %eax,%ecx \n" | |
150 " xorl $0x200000,%eax \n" | |
151 " pushl %eax \n" | |
152 " popfl \n" | |
153 " pushfl \n" | |
154 " popl %eax \n" | |
155 " xorl %ecx,%eax \n" | |
156 " jz 1f \n" | |
157 " movl $1,-8(%ebp) \n" | |
158 "1: \n" | |
159 ); | |
160 #elif defined(__sun) && defined(__amd64) | |
161 __asm ( | |
162 " pushfq \n" | |
163 " popq %rax \n" | |
164 " movq %rax,%rcx \n" | |
165 " xorl $0x200000,%eax \n" | |
166 " pushq %rax \n" | |
167 " popfq \n" | |
168 " pushfq \n" | |
169 " popq %rax \n" | |
170 " xorl %ecx,%eax \n" | |
171 " jz 1f \n" | |
172 " movl $1,-8(%rbp) \n" | |
173 "1: \n" | |
174 ); | |
175 #endif | |
176 | |
177 return has_CPUID; | |
178 } | |
179 | |
180 #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__) | |
181 # define VEC_CPU_CPUID(func, a, b, c, d) \ | |
182 __asm__ __volatile__( \ | |
183 " pushl %%ebx \n" \ | |
184 " xorl %%ecx,%%ecx \n" \ | |
185 " cpuid \n" \ | |
186 " movl %%ebx, %%esi \n" \ | |
187 " popl %%ebx \n" \ | |
188 : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \ | |
189 : "a"(func)) | |
190 #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__) | |
191 # define VEC_CPU_CPUID(func, a, b, c, d) \ | |
192 __asm__ __volatile__( \ | |
193 " pushq %%rbx \n" \ | |
194 " xorq %%rcx,%%rcx \n" \ | |
195 " cpuid \n" \ | |
196 " movq %%rbx, %%rsi \n" \ | |
197 " popq %%rbx \n" \ | |
198 : "=a"(a), "=S"(b), "=c"(c), "=d"(d) \ | |
199 : "a"(func)) | |
200 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
201 # define VEC_CPU_CPUID(func, a, b, c, d) \ | |
202 __asm { \ | |
203 __asm mov eax, func \ | |
204 __asm xor ecx, ecx \ | |
205 __asm cpuid \ | |
206 __asm mov a, eax \ | |
207 __asm mov b, ebx \ | |
208 __asm mov c, ecx \ | |
209 __asm mov d, edx \ | |
210 } | |
211 #elif (defined(_MSC_VER) && defined(_M_X64)) | |
212 // Use __cpuidex instead of __cpuid because ICL does not clear ecx register | |
213 # define VEC_CPU_CPUID(func, a, b, c, d) \ | |
214 do { \ | |
215 int CPUInfo[4]; \ | |
216 __cpuidex(CPUInfo, func, 0); \ | |
217 a = CPUInfo[0]; \ | |
218 b = CPUInfo[1]; \ | |
219 c = CPUInfo[2]; \ | |
220 d = CPUInfo[3]; \ | |
221 } while (0) | |
222 #else | |
223 # define VEC_CPU_CPUID(func, a, b, c, d) \ | |
224 do { \ | |
225 a = b = c = d = 0; \ | |
226 (void)a; \ | |
227 (void)b; \ | |
228 (void)c; \ | |
229 (void)d; \ | |
230 } while (0) | |
231 #endif | |
232 | |
233 // --------------------------------------------------------------- | |
234 | |
235 static int vec_CPU_CPUIDFeatures[4]; | |
236 static int vec_CPU_CPUIDMaxFunction = 0; | |
237 static int vec_CPU_OSSavesYMM = 0; | |
238 static int vec_CPU_OSSavesZMM = 0; | |
239 | |
240 static inline void vec_CPU_get_CPUID_features(void) | |
241 { | |
242 static int checked = 0; | |
243 if (!checked) { | |
244 checked = 1; | |
245 if (vec_CPU_have_CPUID()) { | |
246 int a, b, c, d; | |
247 VEC_CPU_CPUID(0, a, b, c, d); | |
248 vec_CPU_CPUIDMaxFunction = a; | |
249 if (vec_CPU_CPUIDMaxFunction >= 1) { | |
250 VEC_CPU_CPUID(1, a, b, c, d); | |
251 vec_CPU_CPUIDFeatures[0] = a; | |
252 vec_CPU_CPUIDFeatures[1] = b; | |
253 vec_CPU_CPUIDFeatures[2] = c; | |
254 vec_CPU_CPUIDFeatures[3] = d; | |
255 | |
256 // Check to make sure we can call xgetbv | |
257 if (c & 0x08000000) { | |
258 // Call xgetbv to see if YMM (etc) register state is saved | |
259 #if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__)) | |
260 __asm__(".byte 0x0f, 0x01, 0xd0" | |
261 : "=a"(a) | |
262 : "c"(0) | |
263 : "%edx"); | |
264 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) // VS2010 SP1 | |
265 a = (int)_xgetbv(0); | |
266 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__) | |
267 __asm { | |
268 xor ecx, ecx | |
269 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 | |
270 mov a, eax | |
271 } | |
272 #endif | |
273 vec_CPU_OSSavesYMM = ((a & 6) == 6) ? 1 : 0; | |
274 vec_CPU_OSSavesZMM = (vec_CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? 1 : 0; | |
275 } | |
276 } | |
277 } | |
278 } | |
279 } | |
280 | |
281 #if !((defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))) && defined(VEC_COMPILER_HAS_ALTIVEC) && defined(__GNUC__) | |
282 static jmp_buf vec_jmpbuf; | |
283 static void vec_CPU_illegal_instruction(int sig) | |
284 { | |
285 longjmp(vec_jmpbuf, 1); | |
286 } | |
287 #endif | |
288 | |
289 static int vec_CPU_have_ALTIVEC(void) | |
290 { | |
291 volatile int altivec = 0; | |
292 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__)) | |
293 int selectors[2] = { | |
294 # ifdef __OpenBSD__ | |
295 CTL_MACHDEP, CPU_ALTIVEC | |
296 # else | |
297 CTL_HW, HW_VECTORUNIT | |
298 # endif | |
299 }; | |
300 int hasVectorUnit = 0; | |
301 vec_uintsize length = sizeof(hasVectorUnit); | |
302 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0); | |
303 if (!error) | |
304 altivec = (hasVectorUnit != 0); | |
305 #elif defined(__FreeBSD__) && defined(__powerpc__) | |
306 unsigned long cpufeatures = 0; | |
307 elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures)); | |
308 altivec = cpufeatures & PPC_FEATURE_HAS_ALTIVEC; | |
309 #elif defined(VEC_COMPILER_HAS_ALTIVEC) && defined(__GNUC__) | |
310 void (*handler)(int sig); | |
311 handler = signal(SIGILL, vec_CPU_illegal_instruction); | |
312 if (!setjmp(vec_jmpbuf)) { | |
313 vector unsigned char vec; | |
314 vec_and(vec, vec); | |
315 altivec = 1; | |
316 } | |
317 signal(SIGILL, handler); | |
318 #endif | |
319 return altivec; | |
320 } | |
321 | |
322 static int vec_CPU_have_ALTIVEC_VSX(void) | |
323 { | |
324 volatile int vsx = 0; | |
325 #if defined(VEC_COMPILER_HAS_ALTIVEC_VSX) && defined(__GNUC__) | |
326 # warning Compiling UNTESTED code for VSX. | |
327 void (*handler)(int sig); | |
328 handler = signal(SIGILL, vec_CPU_illegal_instruction); | |
329 if (!setjmp(vec_jmpbuf)) { | |
330 // this is completely untested | |
331 //__asm__ __volatile__("mtspr 256, %0\n\t" | |
332 // "xxland %%v0, %%v0, %%v0" ::"r"(-1)); | |
333 //vsx = 1; | |
334 } | |
335 signal(SIGILL, handler); | |
336 #endif | |
337 return vsx; | |
338 } | |
339 | |
340 #define vec_CPU_have_MMX() (vec_CPU_CPUIDFeatures[3] & 0x00800000) | |
341 #define vec_CPU_have_SSE() (vec_CPU_CPUIDFeatures[3] & 0x02000000) | |
342 #define vec_CPU_have_SSE2() (vec_CPU_CPUIDFeatures[3] & 0x04000000) | |
343 #define vec_CPU_have_SSE3() (vec_CPU_CPUIDFeatures[2] & 0x00000001) | |
344 #define vec_CPU_have_SSE41() (vec_CPU_CPUIDFeatures[2] & 0x00080000) | |
345 #define vec_CPU_have_SSE42() (vec_CPU_CPUIDFeatures[2] & 0x00100000) | |
346 #define vec_CPU_have_AVX() (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDFeatures[2] & 0x10000000)) | |
347 | |
348 static inline int vec_CPU_have_AVX2(void) | |
349 { | |
350 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { | |
351 int a, b, c, d; | |
352 VEC_CPU_CPUID(7, a, b, c, d); | |
353 return b & 0x00000020; | |
354 (void)a, (void)c, (void)d; | |
355 } | |
356 return 0; | |
357 } | |
358 | |
359 static inline int vec_CPU_have_AVX512F(void) | |
360 { | |
361 if (vec_CPU_OSSavesYMM && (vec_CPU_CPUIDMaxFunction >= 7)) { | |
362 int a, b, c, d; | |
363 VEC_CPU_CPUID(7, a, b, c, d); | |
364 return b & 0x00000020; | |
365 (void)a, (void)c, (void)d; | |
366 } | |
367 return 0; | |
368 } | |
369 | |
370 #if defined(__linux__) && defined(__arm__) && !defined(HAVE_GETAUXVAL) | |
371 static int readProcAuxvForNeon(void) | |
372 { | |
373 int neon = 0; | |
374 int fd; | |
375 | |
376 fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC); | |
377 if (fd >= 0) { | |
378 Elf32_auxv_t aux; | |
379 while (read(fd, &aux, sizeof(aux)) == sizeof(aux)) { | |
380 if (aux.a_type == AT_HWCAP) { | |
381 neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON; | |
382 break; | |
383 } | |
384 } | |
385 close(fd); | |
386 } | |
387 return neon; | |
388 } | |
389 #endif | |
390 | |
391 static int vec_CPU_have_NEON(void) | |
392 { | |
393 /* The way you detect NEON is a privileged instruction on ARM, so you have | |
394 query the OS kernel in a platform-specific way. :/ */ | |
395 #if defined(SDL_CPUINFO_DISABLED) | |
396 return 0; /* disabled */ | |
397 #elif (defined(__WINDOWS__) || defined(__WINRT__) || defined(__GDK__)) && (defined(_M_ARM) || defined(_M_ARM64)) | |
398 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */ | |
399 /* Seems to have been removed */ | |
400 #ifndef PF_ARM_NEON_INSTRUCTIONS_AVAILABLE | |
401 #define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19 | |
402 #endif | |
403 /* All WinRT ARM devices are required to support NEON, but just in case. */ | |
404 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0; | |
405 #elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__) | |
406 return 1; /* ARMv8 always has non-optional NEON support. */ | |
407 #elif defined(__VITA__) | |
408 return 1; | |
409 #elif defined(__3DS__) | |
410 return 0; | |
411 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7) | |
412 /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */ | |
413 return 1; /* all Apple ARMv7 chips and later have NEON. */ | |
414 #elif defined(__APPLE__) | |
415 return 0; /* assume anything else from Apple doesn't have NEON. */ | |
416 #elif !defined(__arm__) | |
417 return 0; /* not an ARM CPU at all. */ | |
418 #elif defined(__OpenBSD__) | |
419 return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */ | |
420 #elif defined(HAVE_ELF_AUX_INFO) | |
421 unsigned long hasneon = 0; | |
422 if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0) | |
423 return 0; | |
424 | |
425 return ((hasneon & HWCAP_NEON) == HWCAP_NEON); | |
426 #elif defined(__QNXNTO__) | |
427 return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON; | |
428 #elif (defined(__linux__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL) | |
429 return (getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON; | |
430 #elif defined(__linux__) | |
431 return readProcAuxvForNeon(); | |
432 #elif defined(__ANDROID__) | |
433 /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */ | |
434 { | |
435 AndroidCpuFamily cpu_family = android_getCpuFamily(); | |
436 if (cpu_family == ANDROID_CPU_FAMILY_ARM) { | |
437 uint64_t cpu_features = android_getCpuFeatures(); | |
438 if (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) { | |
439 return 1; | |
440 } | |
441 } | |
442 return 0; | |
443 } | |
444 #elif defined(__RISCOS__) | |
445 /* Use the VFPSupport_Features SWI to access the MVFR registers */ | |
446 { | |
447 _kernel_swi_regs regs; | |
448 regs.r[0] = 0; | |
449 if (_kernel_swi(VFPSupport_Features, ®s, ®s) == NULL) { | |
450 if ((regs.r[2] & 0xFFF000) == 0x111000) { | |
451 return 1; | |
452 } | |
453 } | |
454 return 0; | |
455 } | |
456 #else | |
457 #warning vec_CPU_have_NEON is not implemented for this ARM platform. Write me. | |
458 return 0; | |
459 #endif | |
460 } | |
461 | |
462 #define VEC_CPU_FEATURES_RESET VEC_UINT32_C(0xFFFFFFFF) | |
463 | |
464 static vec_uint32 vec_CPU_features = VEC_CPU_FEATURES_RESET; | |
465 | |
466 vec_uint32 vec_get_CPU_features(void) | |
467 { | |
468 if (vec_CPU_features == VEC_CPU_FEATURES_RESET) { | |
469 vec_CPU_get_CPUID_features(); | |
470 vec_CPU_features = 0; | |
471 if (vec_CPU_have_ALTIVEC()) | |
472 vec_CPU_features |= VEC_CPU_HAS_ALTIVEC; | |
473 if (vec_CPU_have_ALTIVEC_VSX()) | |
474 vec_CPU_features |= VEC_CPU_HAS_ALTIVEC_VSX; | |
475 if (vec_CPU_have_MMX()) | |
476 vec_CPU_features |= VEC_CPU_HAS_MMX; | |
477 if (vec_CPU_have_SSE()) | |
478 vec_CPU_features |= VEC_CPU_HAS_SSE; | |
479 if (vec_CPU_have_SSE2()) | |
480 vec_CPU_features |= VEC_CPU_HAS_SSE2; | |
481 if (vec_CPU_have_SSE3()) | |
482 vec_CPU_features |= VEC_CPU_HAS_SSE3; | |
483 if (vec_CPU_have_SSE41()) | |
484 vec_CPU_features |= VEC_CPU_HAS_SSE41; | |
485 if (vec_CPU_have_SSE42()) | |
486 vec_CPU_features |= VEC_CPU_HAS_SSE42; | |
487 if (vec_CPU_have_AVX()) | |
488 vec_CPU_features |= VEC_CPU_HAS_AVX; | |
489 if (vec_CPU_have_AVX2()) | |
490 vec_CPU_features |= VEC_CPU_HAS_AVX2; | |
491 if (vec_CPU_have_AVX512F()) | |
492 vec_CPU_features |= VEC_CPU_HAS_AVX512F; | |
493 if (vec_CPU_have_NEON()) | |
494 vec_CPU_features |= VEC_CPU_HAS_NEON; | |
495 } | |
496 return vec_CPU_features; | |
497 } |