view test/test_benchmark_vec.c @ 41:c6e0df09b86f default tip

*: performance improvements with old GCC, reimplement altivec
author Paper <paper@tflc.us>
date Mon, 28 Apr 2025 16:31:59 -0400
parents 4b5a557aa64f
children
line wrap: on
line source

#include "vec/vec.h"

extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl,
	uint32_t length, int32_t *pmin, int32_t *pmax)
{
	int32_t smin = INT32_MAX, smax = INT32_MIN;
	uint32_t len32;
	int i;
	vint16x8 min = vint16x8_splat(*pmin);
	vint16x8 max = vint16x8_splat(*pmax);
	VINT16x8_ALIGNED_ARRAY(mins);
	VINT16x8_ALIGNED_ARRAY(maxs);

	len32 = length / 8;
	while (len32--) {
		vint16x8 vec = vint16x8_load_aligned(smpl);

		min = vint16x8_min(vec, min);
		max = vint16x8_max(vec, max);

		smpl += 8;
	}

	vint16x8_store_aligned(min, mins);
	vint16x8_store_aligned(max, maxs);

	/* get the lowest minimum of what we have left */
	for (i = 0; i < 8; i++) {
		if (mins[i] < smin) smin = mins[i];
		if (maxs[i] > smax) smax = maxs[i];
	}

	len32 = length % 8;
	while (len32--) {
		if (*smpl < smin) smin = *smpl;
		if (*smpl > smax) smax = *smpl;

		smpl++;
	}

	*pmin = smin;
	*pmax = smax;
}