changeset 29:e59c91d050c0

*: add aligned malloc stuff :)
author Paper <paper@tflc.us>
date Thu, 24 Apr 2025 17:12:05 -0400
parents c6c99ab1088a
children 641d8c79b1da
files CMakeLists.txt include/vec/mem.h include/vec/vec.h src/mem.c src/vec.c test/test.c test/test_align.h
diffstat 7 files changed, 188 insertions(+), 73 deletions(-) [+]
line wrap: on
line diff
--- a/CMakeLists.txt	Thu Apr 24 00:54:02 2025 -0400
+++ b/CMakeLists.txt	Thu Apr 24 17:12:05 2025 -0400
@@ -7,6 +7,7 @@
 target_sources(vec PRIVATE
 	"src/cpu.c"
 	"src/impl/generic.c"
+	"src/mem.c"
 	# "src/impl/fallback.c" -- deadcode
 )
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/include/vec/mem.h	Thu Apr 24 17:12:05 2025 -0400
@@ -0,0 +1,36 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#ifndef VEC_MEM_H_
+#define VEC_MEM_H_
+
+#include "vec/vec.h"
+
+/* these are portable aligned memory allocation functions. */
+void *vec_malloc(size_t size);
+void *vec_calloc(size_t count, size_t nmemb);
+void *vec_realloc(void *ptr, size_t newsize);
+void vec_free(void *ptr);
+
+#endif
--- a/include/vec/vec.h	Thu Apr 24 00:54:02 2025 -0400
+++ b/include/vec/vec.h	Thu Apr 24 17:12:05 2025 -0400
@@ -165,85 +165,25 @@
 // portable bit shift
 
 // these functions aren't very necessary :/
-inline vec_uintmax vec_lrshift(vec_uintmax x, unsigned int y)
-{
-	return x >> y;
-}
+#define vec_lrshift(x, y) ((x) >> (y))
+#define vec_llshift(x, y) ((x) << (y))
+#define vec_urshift(x, y) ((x) >> (y))
+#define vec_ulshift(x ,y) ((x) << (y))
 
-inline vec_uintmax vec_llshift(vec_uintmax x, unsigned int y)
+inline vec_intmax vec_rshift(vec_intmax x, unsigned int y)
 {
-	return x << y;
-}
-
-inline vec_uintmax vec_urshift(vec_uintmax x, unsigned int y)
-{
-	return x >> y;
-}
-
-inline vec_uintmax vec_ulshift(vec_uintmax x, unsigned int y)
-{
-	return x << y;
+	return (x < 0) ? (~((~(x)) >> y)) : (x >> y);
 }
 
-/**
- * Arithmetic shifts; based off code from OpenMPT, which is under
- * the Boost Software License:
- *
- * Permission is hereby granted, free of charge, to any person or organization
- * obtaining a copy of the software and accompanying documentation covered by
- * this license (the "Software") to use, reproduce, display, distribute,
- * execute, and transmit the Software, and to prepare derivative works of the
- * Software, and to permit third-parties to whom the Software is furnished to
- * do so, all subject to the following:
- * 
- * The copyright notices in the Software and this entire statement, including
- * the above license grant, this restriction and the following disclaimer,
- * must be included in all copies of the Software, in whole or in part, and
- * all derivative works of the Software, unless such copies or derivative
- * works are solely in the form of machine-executable object code generated by
- * a source language processor.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
- * SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
- * FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
-**/
-inline vec_intmax vec_rshift(vec_intmax x, unsigned int y)
+inline vec_intmax vec_lshift(vec_intmax x, unsigned int y)
 {
-	static const vec_uintmax roffset = ((vec_uintmax)1) << ((sizeof(vec_intmax) * 8) - 1);
-
 	union {
 		vec_intmax d;
 		vec_uintmax u;
 	} xx;
 
 	xx.d = x;
-
-	xx.u += roffset;
-	xx.u >>= y;
-	xx.u -= roffset >> y;
-
-	return xx.d;
-}
-
-inline vec_intmax vec_lshift(vec_intmax x, unsigned int y)
-{
-	static const vec_uintmax roffset = ((vec_uintmax)1) << ((sizeof(vec_intmax) * 8) - 1);
-
-	union {
-		vec_intmax d;
-		vec_uintmax u;
-	} xx;
-
-	xx.d = x;
-
-	xx.u += roffset;
 	xx.u <<= y;
-	xx.u -= roffset << y;
-
 	return xx.d;
 }
 
@@ -286,7 +226,7 @@
 # define VEC_ALIGNAS(x) _Alignas(x)
 #elif VEC_GNUC_HAS_ATTRIBUTE(aligned, 2, 7, 0)
 # define VEC_ALIGNAS(x) __attribute__((__aligned__(x)))
-#elif VEC_MSVC_ATLEAST(0, 0, 0) // FIXME which version?
+#elif VEC_MSVC_ATLEAST(12, 0, 0) /* Visual C++ 6 */
 # define VEC_ALIGNAS(x) __declspec(align(x))
 #else
 # error vec: vec requires compiler alignment support
@@ -509,6 +449,9 @@
 //////////////////////////////////////////////////////////////////////////////
 // Defines the structures for each vector type
 
+/* XXX: if we don't have alignas, we should be able
+ * to get around it by using macros :) */
+
 // 16-bit
 typedef struct {
 	VEC_ALIGNAS(VUINT8x2_ALIGNMENT) vec_uint8 bytes[2];
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/mem.c	Thu Apr 24 17:12:05 2025 -0400
@@ -0,0 +1,119 @@
+/**
+ * vec - a tiny SIMD vector library in C99
+ * 
+ * Copyright (c) 2024 Paper
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * 
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+**/
+
+#include "vec/mem.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define VEC_MALLOC_ALIGNMENT (64)
+
+VEC_STATIC_ASSERT(!(VEC_MALLOC_ALIGNMENT & (VEC_MALLOC_ALIGNMENT - 1))
+	&& (VEC_MALLOC_ALIGNMENT > 0),
+	"VEC_MALLOC_ALIGNMENT must be a power of two");
+
+typedef unsigned char vec_alignment_type;
+
+#define VEC_MALLOC_ADDITIONAL_SIZE (sizeof(vec_alignment_type) + (VEC_MALLOC_ALIGNMENT - 1))
+#define VEC_MALLOC_MAX_SIZE (SIZE_MAX - VEC_MALLOC_ADDITIONAL_SIZE)
+
+VEC_FUNC_IMPL void *vec_align_ptr(void *q)
+{
+	vec_alignment_type diff;
+
+	diff = (((uintptr_t)q + (VEC_MALLOC_ALIGNMENT - 1)) & ~(VEC_MALLOC_ALIGNMENT - 1)) - (uintptr_t)q;
+	q = (char *)q + diff;
+
+	memcpy((char *)q - sizeof(diff), &diff, sizeof(diff));
+
+	return q;
+}
+
+/* reverses vec_align_ptr */
+VEC_FUNC_IMPL void *vec_unalign_ptr(void *q)
+{
+	vec_alignment_type diff;
+
+	memcpy(&diff, (char *)q - sizeof(diff), sizeof(diff));
+	q = (char *)q - diff;
+
+	return q;
+}
+
+void *vec_malloc(size_t size)
+{
+	void *q;
+
+	if (size > VEC_MALLOC_MAX_SIZE)
+		return NULL;
+
+	/* allocate space for the diff (we have to do this,
+	 * for realloc has no way of knowing the original ptr) */
+	q = malloc(size + VEC_MALLOC_ADDITIONAL_SIZE);
+	if (!q)
+		return NULL;
+
+	return vec_align_ptr(q);
+}
+
+void *vec_calloc(size_t count, size_t nmemb)
+{
+	size_t size;
+	void *q;
+
+	size = count * nmemb;
+	if (size && size / count != nmemb)
+		return NULL; /* nope */
+
+	q = vec_malloc(size);
+
+	if (q)
+		memset(q, 0, size);
+
+	return q;
+}
+
+void *vec_realloc(void *ptr, size_t newsize)
+{
+	vec_alignment_type diff;
+	void *q;
+
+	if (!ptr)
+		return vec_malloc(newsize);
+
+	if (newsize > VEC_MALLOC_MAX_SIZE)
+		return NULL;
+
+	q = realloc(vec_unalign_ptr(ptr), VEC_MALLOC_ADDITIONAL_SIZE);
+	if (!q)
+		return NULL;
+
+	return vec_align_ptr(q);
+}
+
+void vec_free(void *ptr)
+{
+	if (ptr)
+		free(vec_unalign_ptr(ptr));
+}
--- a/src/vec.c	Thu Apr 24 00:54:02 2025 -0400
+++ b/src/vec.c	Thu Apr 24 17:12:05 2025 -0400
@@ -60,10 +60,6 @@
 # include "vec/impl/arm/neon.h"
 #endif
 
-extern inline vec_uintmax vec_lrshift(vec_uintmax x, unsigned int y);
-extern inline vec_uintmax vec_llshift(vec_uintmax x, unsigned int y);
-extern inline vec_uintmax vec_urshift(vec_uintmax x, unsigned int y);
-extern inline vec_uintmax vec_ulshift(vec_uintmax x, unsigned int y);
 extern inline vec_intmax vec_rshift(vec_intmax x, unsigned int y);
 extern inline vec_intmax vec_lshift(vec_intmax x, unsigned int y);
 
--- a/test/test.c	Thu Apr 24 00:54:02 2025 -0400
+++ b/test/test.c	Thu Apr 24 17:12:05 2025 -0400
@@ -1,4 +1,5 @@
 #include "vec/vec.h"
+#include "vec/mem.h"
 
 #include <stdio.h>
 #include <string.h>
--- a/test/test_align.h	Thu Apr 24 00:54:02 2025 -0400
+++ b/test/test_align.h	Thu Apr 24 17:12:05 2025 -0400
@@ -1,6 +1,7 @@
 static int test_align(void)
 {
 	int ret = 0;
+	int i;
 
 #define RUN_TEST(sign, csign, bits, size) \
 	do { \
@@ -8,7 +9,7 @@
 		V##csign##INT##bits##x##size##_ALIGNED_ARRAY(vec_arr); \
 	\
 		/* fill the values */ \
-		for (int i = 0; i < size; i++) \
+		for (i = 0; i < size; i++) \
 			vec_arr[i] = i; \
 	\
 		/* try to load it */ \
@@ -58,5 +59,23 @@
 #undef RUN_TESTS
 #undef RUN_TEST
 
+	for (i = 0; i < 50; i++) {
+		void *q = vec_malloc(i);
+
+		ret |= !!((uintptr_t)q & 63);
+
+		/* try a bigger size */
+		q = vec_realloc(q, i + 10);
+
+		ret |= !!((uintptr_t)q & 63);
+
+		/* now a smaller one */
+		q = vec_realloc(q, i - 10);
+
+		ret |= !!((uintptr_t)q & 63);
+
+		vec_free(q);
+	}
+
 	return ret;
 }