comparison foosdk/sdk/foobar2000/shared/audio_math.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
comparison
equal deleted inserted replaced
0:e9bb126753e7 1:20d02a178406
1 #include "shared.h"
2
3 //#define AUDIO_MATH_NOASM
4
5 // NOTE: SSE4.1 int16 ops code determined MUCH SLOWER than SSE2 on Sandy Bridge era Xeon and therefore disabled for now
6 // #define SUPPORT_SSE41
7
8 #ifdef SUPPORT_SSE41
9 static const bool g_have_sse41 = pfc::query_cpu_feature_set(pfc::CPU_HAVE_SSE41);
10
11 inline static void convert_from_int16_noopt(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale)
12 {
13 t_size num = p_count;
14 for(;num;num--)
15 *(p_output++) = (audio_sample)*(p_source++) * p_scale;
16 }
17
18 __declspec(naked) static void __fastcall convert_from_int16_sse41_8word(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
19 __asm {
20 // ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
21 movss xmm7, [esp + 8]
22 test edx, edx
23 mov eax, [esp + 4]
24 pshufd xmm7, xmm7, 0
25 jz loopend
26 loopbegin:
27 PMOVSXWD xmm0, mmword ptr [ecx]
28 PMOVSXWD xmm1, mmword ptr [ecx+8]
29 CVTDQ2PS xmm0, xmm0
30 CVTDQ2PS xmm1, xmm1
31 add ecx, 16
32 mulps xmm0, xmm7
33 mulps xmm1, xmm7
34 dec edx
35 movups [eax], xmm0
36 movups [eax + 16], xmm1
37 lea eax, [eax + 32]
38 jnz loopbegin
39 loopend:
40 ret 8
41 }
42 }
43
44
45 __declspec(naked) static void __fastcall convert_from_int16_sse41_8word_aligned(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
46 __asm {
47 // ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
48 movss xmm7, [esp + 8]
49 test edx, edx
50 mov eax, [esp + 4]
51 pshufd xmm7, xmm7, 0
52 jz loopend
53 loopbegin:
54 PMOVSXWD xmm0, mmword ptr [ecx]
55 PMOVSXWD xmm1, mmword ptr [ecx+8]
56 CVTDQ2PS xmm0, xmm0
57 CVTDQ2PS xmm1, xmm1
58 add ecx, 16
59 mulps xmm0, xmm7
60 mulps xmm1, xmm7
61 dec edx
62 movaps [eax], xmm0
63 movaps [eax + 16], xmm1
64 lea eax, [eax + 32]
65 jnz loopbegin
66 loopend:
67 ret 8
68 }
69 }
70 #endif
71
72
73
74 #ifdef audio_math
75 #undef audio_math
76 #endif
77 namespace audio_math {
78
79 void SHARED_EXPORT scale(const audio_sample * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
80 {
81 ::pfc::audio_math::scale(p_source, p_count, p_output, p_scale);
82 }
83
84 void SHARED_EXPORT convert_to_int16(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
85 {
86 ::pfc::audio_math::convert_to_int16(p_source, p_count, p_output, p_scale);
87 }
88
89 audio_sample SHARED_EXPORT convert_to_int16_calculate_peak(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
90 {
91 convert_to_int16(p_source,p_count,p_output,p_scale);
92 return p_scale * calculate_peak(p_source,p_count);
93 }
94
95 void SHARED_EXPORT convert_from_int16(const t_int16 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
96 {
97 #ifdef SUPPORT_SSE41
98 if (g_have_sse41) {
99 audio_sample scale = (audio_sample)(p_scale / (double)0x8000);
100 convert_from_int16_sse41_8word(p_source, p_count >> 3, p_output, scale);
101 convert_from_int16_noopt(p_source + (p_count & ~7), p_count & 7, p_output + (p_count & ~7), scale);
102 return;
103 }
104 #endif
105 ::pfc::audio_math::convert_from_int16(p_source, p_count, p_output, p_scale);
106 }
107
108 void SHARED_EXPORT convert_to_int32(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
109 {
110 return ::pfc::audio_math::convert_to_int32(p_source, p_count, p_output, p_scale);
111 }
112
113 audio_sample SHARED_EXPORT convert_to_int32_calculate_peak(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
114 {
115 convert_to_int32(p_source,p_count,p_output,p_scale);
116 return p_scale * calculate_peak(p_source,p_count);
117 }
118
119 void SHARED_EXPORT convert_from_int32(const t_int32 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
120 {
121 ::pfc::audio_math::convert_from_int32(p_source, p_count, p_output, p_scale);
122 }
123
124
125 audio_sample SHARED_EXPORT calculate_peak(const audio_sample * p_source,t_size p_count) {
126 return ::pfc::audio_math::calculate_peak(p_source, p_count);
127 }
128
129 void SHARED_EXPORT kill_denormal(audio_sample * p_buffer,t_size p_count) {
130 ::pfc::audio_math::remove_denormals(p_buffer, p_count);
131 }
132
133 void SHARED_EXPORT add_offset(audio_sample * p_buffer,audio_sample p_delta,t_size p_count) {
134 ::pfc::audio_math::add_offset(p_buffer, p_delta, p_count);
135 }
136
137 }