Mercurial > vec
view src/impl/x86/sse3.c @ 29:e59c91d050c0
*: add aligned malloc stuff :)
author | Paper <paper@tflc.us> |
---|---|
date | Thu, 24 Apr 2025 17:12:05 -0400 |
parents | c6c99ab1088a |
children |
line wrap: on
line source
/** * vec - a tiny SIMD vector library in C99 * * Copyright (c) 2024 Paper * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. **/ #include "vec/impl/x86/sse3.h" #include <pmmintrin.h> /* SSE3 has a slightly more optimized load function */ #define VEC_SSE2_DEFINE_OPERATIONS_SIGN(sign, bits, size) \ union v##sign##int##bits##x##size##_impl_data { \ v##sign##int##bits##x##size vec; \ __m128i sse; \ }; \ \ VEC_STATIC_ASSERT(VEC_ALIGNOF(__m128i) <= VEC_ALIGNOF(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " alignment needs to be expanded to fit intrinsic type size"); \ VEC_STATIC_ASSERT(sizeof(__m128i) <= sizeof(v##sign##int##bits##x##size), "vec: v" #sign "int" #bits "x" #size " needs to be expanded to fit intrinsic type size"); \ \ VEC_FUNC_IMPL v##sign##int##bits##x##size v##sign##int##bits##x##size##_sse2_load(const vec_##sign##int##bits in[size]) \ { \ union v##sign##int##bits##x##size##_impl_data vec; \ vec.sse = _mm_lddqu_si128((const __m128i *)in); \ return vec.vec; \ } \ \ const v##sign##int##bits##x##size##_impl v##sign##int##bits##x##size##_impl_sse3 = { \ .load = v##sign##int##bits##x##size##_sse2_load, \ }; #define VEC_SSE2_DEFINE_OPERATIONS(bits, size) \ VEC_SSE2_DEFINE_OPERATIONS_SIGN(u, bits, size) \ VEC_SSE2_DEFINE_OPERATIONS_SIGN( , bits, size) VEC_SSE2_DEFINE_OPERATIONS(8, 16) VEC_SSE2_DEFINE_OPERATIONS(16, 8) VEC_SSE2_DEFINE_OPERATIONS(32, 4) VEC_SSE2_DEFINE_OPERATIONS(64, 2)