|
1
|
1 #include "shared.h"
|
|
|
2
|
|
|
3 //#define AUDIO_MATH_NOASM
|
|
|
4
|
|
|
5 // NOTE: SSE4.1 int16 ops code determined MUCH SLOWER than SSE2 on Sandy Bridge era Xeon and therefore disabled for now
|
|
|
6 // #define SUPPORT_SSE41
|
|
|
7
|
|
|
8 #ifdef SUPPORT_SSE41
|
|
|
9 static const bool g_have_sse41 = pfc::query_cpu_feature_set(pfc::CPU_HAVE_SSE41);
|
|
|
10
|
|
|
11 inline static void convert_from_int16_noopt(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale)
|
|
|
12 {
|
|
|
13 t_size num = p_count;
|
|
|
14 for(;num;num--)
|
|
|
15 *(p_output++) = (audio_sample)*(p_source++) * p_scale;
|
|
|
16 }
|
|
|
17
|
|
|
18 __declspec(naked) static void __fastcall convert_from_int16_sse41_8word(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
|
|
|
19 __asm {
|
|
|
20 // ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
|
|
|
21 movss xmm7, [esp + 8]
|
|
|
22 test edx, edx
|
|
|
23 mov eax, [esp + 4]
|
|
|
24 pshufd xmm7, xmm7, 0
|
|
|
25 jz loopend
|
|
|
26 loopbegin:
|
|
|
27 PMOVSXWD xmm0, mmword ptr [ecx]
|
|
|
28 PMOVSXWD xmm1, mmword ptr [ecx+8]
|
|
|
29 CVTDQ2PS xmm0, xmm0
|
|
|
30 CVTDQ2PS xmm1, xmm1
|
|
|
31 add ecx, 16
|
|
|
32 mulps xmm0, xmm7
|
|
|
33 mulps xmm1, xmm7
|
|
|
34 dec edx
|
|
|
35 movups [eax], xmm0
|
|
|
36 movups [eax + 16], xmm1
|
|
|
37 lea eax, [eax + 32]
|
|
|
38 jnz loopbegin
|
|
|
39 loopend:
|
|
|
40 ret 8
|
|
|
41 }
|
|
|
42 }
|
|
|
43
|
|
|
44
|
|
|
45 __declspec(naked) static void __fastcall convert_from_int16_sse41_8word_aligned(const t_int16 * p_source,t_size p_count,audio_sample * p_output,float p_scale) {
|
|
|
46 __asm {
|
|
|
47 // ecx = source, edx = count, [esp + 4] = output, [esp + 8] = scale
|
|
|
48 movss xmm7, [esp + 8]
|
|
|
49 test edx, edx
|
|
|
50 mov eax, [esp + 4]
|
|
|
51 pshufd xmm7, xmm7, 0
|
|
|
52 jz loopend
|
|
|
53 loopbegin:
|
|
|
54 PMOVSXWD xmm0, mmword ptr [ecx]
|
|
|
55 PMOVSXWD xmm1, mmword ptr [ecx+8]
|
|
|
56 CVTDQ2PS xmm0, xmm0
|
|
|
57 CVTDQ2PS xmm1, xmm1
|
|
|
58 add ecx, 16
|
|
|
59 mulps xmm0, xmm7
|
|
|
60 mulps xmm1, xmm7
|
|
|
61 dec edx
|
|
|
62 movaps [eax], xmm0
|
|
|
63 movaps [eax + 16], xmm1
|
|
|
64 lea eax, [eax + 32]
|
|
|
65 jnz loopbegin
|
|
|
66 loopend:
|
|
|
67 ret 8
|
|
|
68 }
|
|
|
69 }
|
|
|
70 #endif
|
|
|
71
|
|
|
72
|
|
|
73
|
|
|
74 #ifdef audio_math
|
|
|
75 #undef audio_math
|
|
|
76 #endif
|
|
|
77 namespace audio_math {
|
|
|
78
|
|
|
79 void SHARED_EXPORT scale(const audio_sample * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
|
|
|
80 {
|
|
|
81 ::pfc::audio_math::scale(p_source, p_count, p_output, p_scale);
|
|
|
82 }
|
|
|
83
|
|
|
84 void SHARED_EXPORT convert_to_int16(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
|
|
|
85 {
|
|
|
86 ::pfc::audio_math::convert_to_int16(p_source, p_count, p_output, p_scale);
|
|
|
87 }
|
|
|
88
|
|
|
89 audio_sample SHARED_EXPORT convert_to_int16_calculate_peak(const audio_sample * p_source,t_size p_count,t_int16 * p_output,audio_sample p_scale)
|
|
|
90 {
|
|
|
91 convert_to_int16(p_source,p_count,p_output,p_scale);
|
|
|
92 return p_scale * calculate_peak(p_source,p_count);
|
|
|
93 }
|
|
|
94
|
|
|
95 void SHARED_EXPORT convert_from_int16(const t_int16 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
|
|
|
96 {
|
|
|
97 #ifdef SUPPORT_SSE41
|
|
|
98 if (g_have_sse41) {
|
|
|
99 audio_sample scale = (audio_sample)(p_scale / (double)0x8000);
|
|
|
100 convert_from_int16_sse41_8word(p_source, p_count >> 3, p_output, scale);
|
|
|
101 convert_from_int16_noopt(p_source + (p_count & ~7), p_count & 7, p_output + (p_count & ~7), scale);
|
|
|
102 return;
|
|
|
103 }
|
|
|
104 #endif
|
|
|
105 ::pfc::audio_math::convert_from_int16(p_source, p_count, p_output, p_scale);
|
|
|
106 }
|
|
|
107
|
|
|
108 void SHARED_EXPORT convert_to_int32(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
|
|
|
109 {
|
|
|
110 return ::pfc::audio_math::convert_to_int32(p_source, p_count, p_output, p_scale);
|
|
|
111 }
|
|
|
112
|
|
|
113 audio_sample SHARED_EXPORT convert_to_int32_calculate_peak(const audio_sample * p_source,t_size p_count,t_int32 * p_output,audio_sample p_scale)
|
|
|
114 {
|
|
|
115 convert_to_int32(p_source,p_count,p_output,p_scale);
|
|
|
116 return p_scale * calculate_peak(p_source,p_count);
|
|
|
117 }
|
|
|
118
|
|
|
119 void SHARED_EXPORT convert_from_int32(const t_int32 * p_source,t_size p_count,audio_sample * p_output,audio_sample p_scale)
|
|
|
120 {
|
|
|
121 ::pfc::audio_math::convert_from_int32(p_source, p_count, p_output, p_scale);
|
|
|
122 }
|
|
|
123
|
|
|
124
|
|
|
125 audio_sample SHARED_EXPORT calculate_peak(const audio_sample * p_source,t_size p_count) {
|
|
|
126 return ::pfc::audio_math::calculate_peak(p_source, p_count);
|
|
|
127 }
|
|
|
128
|
|
|
129 void SHARED_EXPORT kill_denormal(audio_sample * p_buffer,t_size p_count) {
|
|
|
130 ::pfc::audio_math::remove_denormals(p_buffer, p_count);
|
|
|
131 }
|
|
|
132
|
|
|
133 void SHARED_EXPORT add_offset(audio_sample * p_buffer,audio_sample p_delta,t_size p_count) {
|
|
|
134 ::pfc::audio_math::add_offset(p_buffer, p_delta, p_count);
|
|
|
135 }
|
|
|
136
|
|
|
137 }
|