diff foosdk/sdk/foobar2000/shared/audio_math.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foosdk/sdk/foobar2000/shared/audio_math.cpp	Mon Jan 05 02:15:46 2026 -0500
@@ -0,0 +1,137 @@
+#include "shared.h"
+
+//#define AUDIO_MATH_NOASM
+
+// NOTE: SSE4.1 int16 ops code determined MUCH SLOWER than SSE2 on Sandy Bridge era Xeon and therefore disabled for now
+// #define SUPPORT_SSE41
+
+#ifdef SUPPORT_SSE41
+static const bool g_have_sse41 = pfc::query_cpu_feature_set(pfc::CPU_HAVE_SSE41);
+
+inline static void convert_from_int16_noopt(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale)
+{
+	t_size num = p_count;
+	for(;num;num--)
+		*(p_output++) = (audio_sample)*(p_source++) * p_scale;
+}
+
+__declspec(naked) static void __fastcall convert_from_int16_sse41_8word(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
+	__asm {
+		// ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
+		movss xmm7, [esp + 8]
+		test edx, edx
+		mov eax, [esp + 4]
+		pshufd xmm7, xmm7, 0
+		jz loopend
+loopbegin:
+		PMOVSXWD xmm0, mmword ptr [ecx]
+		PMOVSXWD xmm1, mmword ptr [ecx+8]
+		CVTDQ2PS xmm0, xmm0
+		CVTDQ2PS xmm1, xmm1
+		add ecx, 16
+		mulps xmm0, xmm7
+		mulps xmm1, xmm7
+		dec edx
+		movups [eax], xmm0
+		movups [eax + 16], xmm1
+		lea eax, [eax + 32]
+		jnz loopbegin
+loopend:
+		ret 8
+	}
+}
+
+
+__declspec(naked) static void __fastcall convert_from_int16_sse41_8word_aligned(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
+	__asm {
+		// ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
+		movss xmm7, [esp + 8]
+		test edx, edx
+		mov eax, [esp + 4]
+		pshufd xmm7, xmm7, 0
+		jz loopend
+loopbegin:
+		PMOVSXWD xmm0, mmword ptr [ecx]
+		PMOVSXWD xmm1, mmword ptr [ecx+8]
+		CVTDQ2PS xmm0, xmm0
+		CVTDQ2PS xmm1, xmm1
+		add ecx, 16
+		mulps xmm0, xmm7
+		mulps xmm1, xmm7
+		dec edx
+		movaps [eax], xmm0
+		movaps [eax + 16], xmm1
+		lea eax, [eax + 32]
+		jnz loopbegin
+loopend:
+		ret 8
+	}
+}
+#endif
+
+
+
+#ifdef audio_math 
+#undef audio_math
+#endif
+namespace audio_math {
+
+	void SHARED_EXPORT scale(const audio_sample * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
+	{
+		::pfc::audio_math::scale(p_source, p_count, p_output, p_scale);
+	}
+	
+	void SHARED_EXPORT convert_to_int16(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
+	{
+		::pfc::audio_math::convert_to_int16(p_source, p_count, p_output, p_scale);
+	}
+
+	audio_sample SHARED_EXPORT convert_to_int16_calculate_peak(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
+	{
+		convert_to_int16(p_source,p_count,p_output,p_scale);
+		return p_scale * calculate_peak(p_source,p_count);
+	}
+
+	void SHARED_EXPORT convert_from_int16(const t_int16 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
+	{
+#ifdef SUPPORT_SSE41
+		if (g_have_sse41) {
+			audio_sample scale = (audio_sample)(p_scale / (double)0x8000);
+			convert_from_int16_sse41_8word(p_source, p_count >> 3, p_output, scale);
+			convert_from_int16_noopt(p_source + (p_count & ~7), p_count & 7, p_output + (p_count & ~7), scale);
+			return;
+		}
+#endif
+		::pfc::audio_math::convert_from_int16(p_source, p_count, p_output, p_scale);
+	}
+
+	void SHARED_EXPORT convert_to_int32(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
+	{
+		return ::pfc::audio_math::convert_to_int32(p_source, p_count, p_output, p_scale);
+	}
+
+	audio_sample SHARED_EXPORT convert_to_int32_calculate_peak(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
+	{
+		convert_to_int32(p_source,p_count,p_output,p_scale);
+		return p_scale * calculate_peak(p_source,p_count);
+	}
+
+	void SHARED_EXPORT convert_from_int32(const t_int32 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
+	{
+		::pfc::audio_math::convert_from_int32(p_source, p_count, p_output, p_scale);
+	}
+
+
+	audio_sample SHARED_EXPORT calculate_peak(const audio_sample * p_source,t_size p_count) {
+		return ::pfc::audio_math::calculate_peak(p_source, p_count);
+	}
+
+	void SHARED_EXPORT kill_denormal(audio_sample * p_buffer,t_size p_count) {
+		::pfc::audio_math::remove_denormals(p_buffer, p_count);
+	}
+
+	void SHARED_EXPORT add_offset(audio_sample * p_buffer,audio_sample p_delta,t_size p_count) {
+		::pfc::audio_math::add_offset(p_buffer, p_delta, p_count);
+	}
+
+}