annotate foosdk/sdk/pfc/audio_math.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #include "pfc-lite.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2 #include "audio_sample.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 #include "primitives.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4 #include "cpuid.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 #if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || (defined(_M_X64) && !defined(_M_ARM64EC)) || defined(__x86_64__) || defined(__SSE2__)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8 #define AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9 #include <xmmintrin.h>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 #include <tmmintrin.h> // _mm_shuffle_epi8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 #include <smmintrin.h> // _mm_blend_epi16
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 #ifndef _mm_loadu_si32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 #define _mm_loadu_si32(p) _mm_cvtsi32_si128(*(unsigned int const*)(p))
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 #ifndef _mm_storeu_si32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 #define _mm_storeu_si32(p, a) (void)(*(int*)(p) = _mm_cvtsi128_si32((a)))
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 #ifdef __AVX__
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 #define allowAVX 1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 #define haveAVX 1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 #elif PFC_HAVE_CPUID
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24 #define allowAVX 1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 static const bool haveAVX = pfc::query_cpu_feature_set(pfc::CPU_HAVE_AVX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 #define allowAVX 0
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 #define haveAVX 0
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 #ifdef __SSE4_1__
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 #define haveSSE41 true
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33 #elif PFC_HAVE_CPUID
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 static const bool haveSSE41 = pfc::query_cpu_feature_set(pfc::CPU_HAVE_SSE41);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 #define haveSSE41 false
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40 #include <immintrin.h> // _mm256_set1_pd
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 #endif // end SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 #if defined( __aarch64__ ) || defined( _M_ARM64) || defined( _M_ARM64EC )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 #define AUDIO_MATH_ARM64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 #if defined( AUDIO_MATH_ARM64 ) || defined( __ARM_NEON__ )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 #define AUDIO_MATH_NEON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 #include <arm_neon.h>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53 // No vcvtnq_s32_f32 on ARM32, use vcvtq_s32_f32, close enough
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 #ifdef AUDIO_MATH_ARM64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 #define vcvtnq_s32_f32_wrap vcvtnq_s32_f32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57 #define vcvtnq_s32_f32_wrap vcvtq_s32_f32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 #if defined( AUDIO_MATH_ARM64 ) && !defined( __ANDROID__ )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 // Don't do Neon float64 on Android, crashes clang from NDK 25
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 #define AUDIO_MATH_NEON_FLOAT64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 template<typename float_t> inline static float_t noopt_calculate_peak(const float_t *p_src, t_size p_num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 float_t peak = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71 t_size num = p_num;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72 for(;num;num--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 float_t temp = (float_t)fabs(*(p_src++));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 peak = fmax(peak, temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77 return peak;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 template<typename float_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 inline static void noopt_convert_to_32bit(const float_t* p_source,t_size p_count,t_int32 * p_output, float_t p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 t_size num = p_count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 for(;num;--num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 t_int64 val = pfc::audio_math::rint64( *(p_source++) * p_scale );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88 if (val < INT32_MIN) val = INT32_MIN;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 else if (val > INT32_MAX) val = INT32_MAX;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90 *(p_output++) = (t_int32) val;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 template<typename float_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 inline static void noopt_convert_to_16bit(const float_t* p_source,t_size p_count,t_int16 * p_output, float_t p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 for(t_size n=0;n<p_count;n++) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 *(p_output++) = (t_int16) pfc::clip_t<int32_t>(pfc::audio_math::rint32(*(p_source++)*p_scale),INT16_MIN,INT16_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 template<typename float_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102 inline static void noopt_convert_from_int16(const t_int16 * __restrict p_source,t_size p_count, float_t* __restrict p_output, float_t p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 t_size num = p_count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105 for(;num;num--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 *(p_output++) = (float_t)*(p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111 template<typename float_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 inline static void noopt_convert_from_int32(const t_int32 * __restrict p_source,t_size p_count, float_t* __restrict p_output, float_t p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 t_size num = p_count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115 for(;num;num--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 *(p_output++) = (float_t)( * (p_source++) * p_scale );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119 template<typename in_t, typename out_t, typename scale_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 inline static void noopt_scale(const in_t * p_source,size_t p_count,out_t * p_output,scale_t p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
121 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
122 for(t_size n=0;n<p_count;n++)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
123 p_output[n] = (out_t)(p_source[n] * p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
124 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
125 template<typename in_t, typename out_t>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
126 inline static void noopt_convert(const in_t* in, out_t* out, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
127 for (size_t walk = 0; walk < count; ++walk) out[walk] = (out_t)in[walk];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
128 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
129
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
130 #ifdef AUDIO_MATH_NEON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
131
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
132 #ifdef AUDIO_MATH_ARM64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
133 #define _vmaxvq_f32_wrap vmaxvq_f32
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
134 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
135 inline float _vmaxvq_f32_wrap( float32x4_t arg ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
136 return pfc::max_t<float>( pfc::max_t<float>(arg[0], arg[1]), pfc::max_t<float>(arg[2], arg[3]) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
137 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
138 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
139
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
140 inline static float neon_calculate_peak( const float * p_source, size_t p_count ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
141 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
142 float32x4_t ret1 = {}, ret2 = {};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
143 for(;num;--num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
144 float32x4_t f32lo = vld1q_f32( p_source );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
145 float32x4_t f32hi = vld1q_f32( p_source + 4 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
146 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
147 ret1 = vmaxq_f32(ret1, vabsq_f32(f32lo));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
148 ret2 = vmaxq_f32(ret2, vabsq_f32(f32hi));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
149 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
150
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
151 float ret = _vmaxvq_f32_wrap(vmaxq_f32( ret1, ret2 ));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
152
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
153 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
154 if ( rem != 0 ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
155 float v = noopt_calculate_peak( p_source, p_count % 8);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
156 if (v > ret) ret = v;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
157 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
158
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
159 return ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
160 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
161
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
162 inline static void neon_scale(const float * p_source,size_t p_count, float * p_output,float p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
163 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
164 for(;num;--num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
165 float32x4_t lo = vld1q_f32( p_source );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
166 float32x4_t hi = vld1q_f32( p_source + 4 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
167
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
168 lo = vmulq_n_f32( lo, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
169 hi = vmulq_n_f32( hi, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
170
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
171 vst1q_f32( p_output, lo );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
172 vst1q_f32( p_output+4, hi );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
173
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
174 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
175 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
176 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
177
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
178 noopt_scale( p_source, p_count % 8, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
179 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
180 inline static void neon_convert_to_int32(const float * __restrict p_source,t_size p_count, int32_t * __restrict p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
181 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
182 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
183 for(;num;--num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
184 float32x4_t f32lo = vld1q_f32( p_source );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
185 float32x4_t f32hi = vld1q_f32( p_source + 4 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
186
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
187 int32x4_t lo = vcvtnq_s32_f32_wrap( vmulq_n_f32(f32lo, p_scale) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
188 int32x4_t hi = vcvtnq_s32_f32_wrap( vmulq_n_f32(f32hi, p_scale) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
189
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
190 vst1q_s32(p_output, lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
191 vst1q_s32(p_output+4, hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
192
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
193 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
194 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
195
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
196 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
197
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
198 noopt_convert_to_32bit(p_source, p_count % 8, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
199 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
200
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
201 inline static void neon_convert_from_int32(const int32_t * __restrict p_source,t_size p_count, float * __restrict p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
202 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
203 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
204 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
205 for(;num;num--) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
206 int32x4_t i32lo = vld1q_s32( p_source );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
207 int32x4_t i32hi = vld1q_s32( p_source + 4 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
208 float32x4_t f32vl = vcvtq_f32_s32(i32lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
209 float32x4_t f32vh = vcvtq_f32_s32(i32hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
210
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
211 vst1q_f32(&p_output[0], vmulq_n_f32(f32vl, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
212 vst1q_f32(&p_output[4], vmulq_n_f32(f32vh, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
213
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
214 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
215 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
216
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
217 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
218
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
219 noopt_convert_from_int32( p_source, rem, p_output, p_scale );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
220 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
221
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
222 inline static void neon_convert_to_int16(const float* __restrict p_source,t_size p_count, int16_t * __restrict p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
223 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
224 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
225 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
226 for(;num;--num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
227 float32x4_t f32lo = vld1q_f32( p_source );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
228 float32x4_t f32hi = vld1q_f32( p_source + 4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
229
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
230 int32x4_t lo = vcvtnq_s32_f32_wrap( vmulq_n_f32(f32lo, p_scale) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
231 int32x4_t hi = vcvtnq_s32_f32_wrap( vmulq_n_f32(f32hi, p_scale) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
232
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
233 vst1q_s16(&p_output[0], vcombine_s16( vqmovn_s32( lo ), vqmovn_s32( hi ) ) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
234
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
235 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
236 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
237
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
238 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
239
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
240 noopt_convert_to_16bit(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
241
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
242 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
243 inline static void neon_convert_from_int16(const t_int16 * __restrict p_source,t_size p_count, float * __restrict p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
244 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
245 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
246 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
247 for(;num;num--) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
248 auto i16lo = vld1_s16(p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
249 auto i16hi = vld1_s16(p_source + 4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
250
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
251 float32x4_t f32vl = vcvtq_f32_s32(vmovl_s16 (i16lo));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
252 float32x4_t f32vh = vcvtq_f32_s32(vmovl_s16 (i16hi));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
253
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
254 vst1q_f32(&p_output[0], vmulq_n_f32(f32vl, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
255 vst1q_f32(&p_output[4], vmulq_n_f32(f32vh, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
256
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
257 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
258 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
259
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
260 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
261
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
262 noopt_convert_from_int16( p_source, rem, p_output, p_scale );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
263 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
264 #ifdef AUDIO_MATH_NEON_FLOAT64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
265 inline static void neon_convert_to_int16(const double* __restrict p_source, t_size p_count, int16_t* __restrict p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
266 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
267 size_t num = p_count / 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
268 size_t rem = p_count % 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
269 for (; num; --num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
270 float64x2_t f64lo = vld1q_f64(p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
271 float64x2_t f64hi = vld1q_f64(p_source + 2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
272
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
273 f64lo = vmulq_n_f64(f64lo, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
274 f64hi = vmulq_n_f64(f64hi, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
275
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
276 int64x2_t lo64 = vcvtnq_s64_f64(f64lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
277 int64x2_t hi64 = vcvtnq_s64_f64(f64hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
278
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
279 int32x4_t v32 = vcombine_s32(vqmovn_s64(lo64), vqmovn_s64(hi64));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
280
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
281
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
282 vst1_s16(&p_output[0], vqmovn_s32(v32));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
283
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
284 p_source += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
285 p_output += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
286
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
287 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
288
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
289 noopt_convert_to_16bit(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
290 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
291
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
292 inline static void neon_convert_from_int16(const t_int16* __restrict p_source, t_size p_count, double* __restrict p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
293 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
294 size_t num = p_count / 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
295 size_t rem = p_count % 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
296 for (; num; num--) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
297 int32x4_t i32 = vmovl_s16(vld1_s16(p_source));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
298
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
299 int64x2_t lo64 = vmovl_s32( vget_low_s32(i32) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
300 int64x2_t hi64 = vmovl_s32(vget_high_s32(i32));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
301
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
302 float64x2_t f64vl = vcvtq_f64_s64(lo64);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
303 float64x2_t f64vh = vcvtq_f64_s64(hi64);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
304
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
305 vst1q_f64(&p_output[0], vmulq_n_f64(f64vl, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
306 vst1q_f64(&p_output[2], vmulq_n_f64(f64vh, p_scale));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
307
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
308 p_source += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
309 p_output += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
310
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
311 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
312
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
313 noopt_convert_from_int16(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
314 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
315 #endif // AUDIO_MATH_NEON_FLOAT64
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
316
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
317 #endif // AUDIO_MATH_NEON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
318
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
319 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
320
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
321 inline void convert_to_32bit_sse2(const float* p_src, size_t numTotal, t_int32* p_dst, float p_mul)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
322 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
323
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
324 // Implementation notes
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
325 // There doesn't seem to be a nice and tidy way to convert float to int32 with graceful clipping to INT32_MIN .. INT32_MAX range.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
326 // While low clipping at INT32_MIN can be accomplished with _mm_max_ps(), high clipping needs float compare THEN substitute bad int with INT32_MAX.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
327 // The best we could do with _mm_min_ps() would result with high clipping at 0x7FFFFF80 instead of 0x7FFFFFFF (INT32_MAX).
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
328 // We store masks from float compare and fix ints according to the mask later.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
329
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
330 __m128 mul = _mm_set1_ps(p_mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
331 __m128 loF = _mm_set1_ps((float)INT32_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
332 __m128 hiF = _mm_set1_ps((float)INT32_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
333 // __m128i loI = _mm_set1_epi32(INT32_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
334 __m128i hiI = _mm_set1_epi32(INT32_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
335
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
336 size_t num = numTotal / 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
337 size_t rem = numTotal % 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
338 for (; num; --num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
339 __m128 s = _mm_mul_ps(mul, _mm_loadu_ps(p_src)); p_src += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
340
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
341 s = _mm_max_ps(s, loF);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
342
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
343 // __m128i maskLo = _mm_castps_si128(_mm_cmple_ps(s, loF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
344 __m128i maskHi = _mm_castps_si128(_mm_cmpge_ps(s, hiF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
345
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
346 __m128i i = _mm_cvtps_epi32(s);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
347
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
348 // i = _mm_or_si128(_mm_andnot_si128(maskLo, i), _mm_and_si128(loI, maskLo));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
349 i = _mm_or_si128(_mm_andnot_si128(maskHi, i), _mm_and_si128(hiI, maskHi));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
350
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
351 _mm_storeu_si128((__m128i*) p_dst, i); p_dst += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
352 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
353
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
354 for (; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
355 __m128 s = _mm_mul_ss(_mm_load_ss(p_src++), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
356 s = _mm_max_ss(s, loF);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
357
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
358 // __m128i maskLo = _mm_castps_si128( _mm_cmple_ss(s, loF) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
359 __m128i maskHi = _mm_castps_si128(_mm_cmpge_ss(s, hiF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
360
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
361 __m128i i = _mm_cvtps_epi32(s); // not ss
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
362
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
363 // i = _mm_or_si128(_mm_andnot_si128(maskLo, i), _mm_and_si128(loI, maskLo));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
364 i = _mm_or_si128(_mm_andnot_si128(maskHi, i), _mm_and_si128(hiI, maskHi));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
365
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
366 _mm_storeu_si32(p_dst++, i);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
367 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
368 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
369
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
370 inline void convert_to_32bit_sse2(const double* p_src, size_t numTotal, t_int32* p_dst, double p_mul)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
371 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
372 auto mul = _mm_set1_pd(p_mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
373 auto loF = _mm_set1_pd(INT32_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
374 auto hiF = _mm_set1_pd(INT32_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
375
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
376 size_t num = numTotal / 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
377 size_t rem = numTotal % 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
378 for (; num; --num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
379 auto v1 = _mm_loadu_pd(p_src);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
380 auto v2 = _mm_loadu_pd(p_src + 2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
381 p_src += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
382
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
383 v1 = _mm_mul_pd(v1, mul); v2 = _mm_mul_pd(v2, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
384
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
385 v1 = _mm_max_pd(v1, loF); v2 = _mm_max_pd(v2, loF);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
386 v1 = _mm_min_pd(v1, hiF); v2 = _mm_min_pd(v2, hiF);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
387
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
388 auto i1 = _mm_cvtpd_epi32(v1), i2 = _mm_cvtpd_epi32(v2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
389
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
390
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
391 _mm_storeu_si128((__m128i*) p_dst, _mm_unpacklo_epi64(i1, i2)); p_dst += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
392 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
393
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
394 for (; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
395 auto s = _mm_mul_sd(_mm_load_sd(p_src++), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
396 s = _mm_max_sd(s, loF); s = _mm_min_sd(s, hiF);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
397 * p_dst++ = _mm_cvtsd_si32(s);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
398 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
399 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
400
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
401 inline void convert_from_int16_sse2(const t_int16 * p_source,t_size p_count,float * p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
402 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
403 while(!pfc::is_ptr_aligned_t<16>(p_output) && p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
404 *(p_output++) = (float)*(p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
405 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
406 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
407
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
408 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
409 __m128 mul = _mm_set1_ps(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
410 __m128i nulls = _mm_setzero_si128();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
411 __m128i delta1 = _mm_set1_epi16((int16_t)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
412 __m128i delta2 = _mm_set1_epi32((int32_t)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
413
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
414 for(t_size loop = p_count >> 3;loop;--loop) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
415 __m128i source, temp1, temp2; __m128 float1, float2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
416 source = _mm_loadu_si128((__m128i*)p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
417 source = _mm_xor_si128(source,delta1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
418 temp1 = _mm_unpacklo_epi16(source,nulls);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
419 temp2 = _mm_unpackhi_epi16(source,nulls);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
420 temp1 = _mm_sub_epi32(temp1,delta2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
421 temp2 = _mm_sub_epi32(temp2,delta2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
422 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
423 float1 = _mm_cvtepi32_ps(temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
424 float2 = _mm_cvtepi32_ps(temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
425 float1 = _mm_mul_ps(float1,mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
426 float2 = _mm_mul_ps(float2,mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
427 _mm_store_ps(p_output,float1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
428 _mm_store_ps(p_output+4,float2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
429 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
430 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
431
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
432 p_count &= 7;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
433 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
434
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
435 while(p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
436 *(p_output++) = (float)*(p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
437 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
438 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
439 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
440
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
441 inline static void convert_to_16bit_sse2(const float * p_source,t_size p_count,t_int16 * p_output,float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
442 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
443 size_t num = p_count/8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
444 size_t rem = p_count%8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
445 __m128 mul = _mm_set1_ps(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
446 for(;num;--num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
447 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
448 __m128 temp1,temp2; __m128i itemp1, itemp2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
449 temp1 = _mm_loadu_ps(p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
450 temp2 = _mm_loadu_ps(p_source+4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
451 temp1 = _mm_mul_ps(temp1,mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
452 temp2 = _mm_mul_ps(temp2,mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
453 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
454 itemp1 = _mm_cvtps_epi32(temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
455 itemp2 = _mm_cvtps_epi32(temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
456 _mm_storeu_si128( (__m128i*)p_output, _mm_packs_epi32(itemp1, itemp2) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
457 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
458 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
459
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
460 noopt_convert_to_16bit(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
461 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
462
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
463 inline static void convert_to_16bit_sse2(const double* p_source, t_size p_count, t_int16* p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
464 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
465 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
466 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
467 __m128d mul = _mm_set1_pd(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
468 for (; num; --num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
469 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
470 __m128d temp1, temp2, temp3, temp4; __m128i itemp1, itemp2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
471 temp1 = _mm_loadu_pd(p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
472 temp2 = _mm_loadu_pd(p_source + 2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
473 temp3 = _mm_loadu_pd(p_source + 4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
474 temp4 = _mm_loadu_pd(p_source + 6);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
475
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
476 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
477
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
478 temp1 = _mm_mul_pd(temp1, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
479 temp2 = _mm_mul_pd(temp2, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
480 temp3 = _mm_mul_pd(temp3, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
481 temp4 = _mm_mul_pd(temp4, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
482
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
483
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
484 itemp1 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(temp1), _mm_cvtpd_epi32(temp2));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
485 itemp2 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(temp3), _mm_cvtpd_epi32(temp4));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
486
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
487 _mm_storeu_si128((__m128i*)p_output, _mm_packs_epi32(itemp1, itemp2));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
488 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
489 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
490
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
491 noopt_convert_to_16bit(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
492 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
493 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
494 inline static void avx_convert_to_16bit(const double* p_source, size_t p_count, int16_t* p_output, double p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
495 size_t num = p_count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
496 size_t rem = p_count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
497 auto mul = _mm256_set1_pd(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
498 for (; num; --num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
499 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
500 auto temp1 = _mm256_loadu_pd(p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
501 auto temp2 = _mm256_loadu_pd(p_source + 4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
502
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
503 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
504
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
505 temp1 = _mm256_mul_pd(temp1, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
506 temp2 = _mm256_mul_pd(temp2, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
507
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
508 auto itemp1 = _mm256_cvtpd_epi32(temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
509 auto itemp2 = _mm256_cvtpd_epi32(temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
510
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
511 _mm_storeu_si128((__m128i*)p_output, _mm_packs_epi32(itemp1, itemp2));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
512 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
513 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
514
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
515 noopt_convert_to_16bit(p_source, rem, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
516 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
517 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
518
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
519 inline float sse_calculate_peak( const float * src, size_t count ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
520 size_t num = count/8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
521 size_t rem = count%8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
522
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
523 __m128 mask = _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
524 __m128 acc1 = _mm_setzero_ps(), acc2 = _mm_setzero_ps();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
525
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
526 for(;num;--num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
527 __m128 v1 = _mm_loadu_ps( src );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
528 __m128 v2 = _mm_loadu_ps( src + 4 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
529 v1 = _mm_and_ps( v1, mask );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
530 v2 = _mm_and_ps( v2, mask );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
531 // Two acc channels so one _mm_max_ps doesn't block the other
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
532 acc1 = _mm_max_ps( acc1, v1 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
533 acc2 = _mm_max_ps( acc2, v2 );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
534 src += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
535 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
536
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
537 float ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
538 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
539 float blah[4];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
540 _mm_storeu_ps(blah, _mm_max_ps( acc1, acc2 ));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
541 __m128 acc = _mm_load_ss( &blah[0] );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
542 acc = _mm_max_ss( acc, _mm_load_ss( &blah[1] ) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
543 acc = _mm_max_ss( acc, _mm_load_ss( &blah[2] ) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
544 acc = _mm_max_ss( acc, _mm_load_ss( &blah[3] ) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
545 ret = _mm_cvtss_f32(acc);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
546 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
547 if ( rem > 0 ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
548 __m128 acc = _mm_set_ss( ret );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
549 for( ;rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
550 __m128 v = _mm_load_ss( src++ );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
551 v = _mm_and_ps( v, mask );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
552 acc = _mm_max_ss( acc, v );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
553 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
554 ret = _mm_cvtss_f32(acc);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
555 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
556 return ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
557 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
558
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
559 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
560 inline double avx_calculate_peak(const double* src, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
561 size_t num = count / 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
562 size_t rem = count % 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
563
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
564 auto mask = _mm256_castsi256_pd(_mm256_set1_epi64x(0x7FFFFFFFFFFFFFFF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
565 auto acc1 = _mm256_setzero_pd(), acc2 = _mm256_setzero_pd();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
566
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
567 for (; num; --num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
568 auto v1 = _mm256_loadu_pd(src);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
569 auto v2 = _mm256_loadu_pd(src + 4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
570
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
571 v1 = _mm256_and_pd(v1, mask);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
572 v2 = _mm256_and_pd(v2, mask);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
573
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
574 acc1 = _mm256_max_pd(acc1, v1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
575 acc2 = _mm256_max_pd(acc2, v2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
576
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
577 src += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
578 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
579
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
580 __m128d acc;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
581 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
582 acc1 = _mm256_max_pd(acc1, acc2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
583
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
584 acc = _mm_max_pd(_mm256_extractf128_pd(acc1, 0), _mm256_extractf128_pd(acc1, 1));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
585
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
586 acc = _mm_max_sd(acc, _mm_shuffle_pd(acc, acc, _MM_SHUFFLE2(0, 1)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
587 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
588
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
589 if (rem > 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
590 __m128d mask128 = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFFFFFFFFFFFFFF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
591 for (; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
592 __m128d v = _mm_load_sd(src++);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
593 v = _mm_and_pd(v, mask128);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
594 acc = _mm_max_sd(acc, v);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
595 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
596 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
597 return _mm_cvtsd_f64(acc);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
598 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
599 #endif // allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
600
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
601 inline double sse_calculate_peak(const double* src, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
602 size_t num = count / 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
603 size_t rem = count % 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
604
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
605 __m128d mask = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFFFFFFFFFFFFFF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
606 __m128d acc1 = _mm_setzero_pd(), acc2 = _mm_setzero_pd();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
607
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
608 for (; num; --num) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
609 __m128d v1 = _mm_loadu_pd(src);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
610 __m128d v2 = _mm_loadu_pd(src + 2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
611 v1 = _mm_and_pd(v1, mask);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
612 v2 = _mm_and_pd(v2, mask);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
613 // Two acc channels so one _mm_max_pd doesn't block the other
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
614 acc1 = _mm_max_pd(acc1, v1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
615 acc2 = _mm_max_pd(acc2, v2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
616 src += 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
617 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
618
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
619 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
620 acc1 = _mm_max_pd(acc1, acc2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
621 acc1 = _mm_max_sd(acc1, _mm_shuffle_pd(acc1, acc1, _MM_SHUFFLE2(0, 1)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
622 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
623 if (rem > 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
624 for (; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
625 __m128d v = _mm_load_sd(src++);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
626 v = _mm_and_pd(v, mask);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
627 acc1 = _mm_max_sd(acc1, v);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
628 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
629 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
630 return _mm_cvtsd_f64(acc1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
631 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
632
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
633 inline void sse_convert_from_int32(const int32_t* source, size_t count, float* output, float scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
634 __m128 mul = _mm_set1_ps(scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
635 for (size_t num = count/8; num; --num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
636 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
637 __m128i itemp1, itemp2; __m128 temp1, temp2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
638 itemp1 = _mm_loadu_si128((__m128i*)source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
639 itemp2 = _mm_loadu_si128((__m128i*)source + 1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
640 temp1 = _mm_cvtepi32_ps(itemp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
641 temp2 = _mm_cvtepi32_ps(itemp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
642 source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
643 temp1 = _mm_mul_ps(temp1, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
644 temp2 = _mm_mul_ps(temp2, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
645 _mm_storeu_ps(output, temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
646 _mm_storeu_ps(output + 4, temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
647 output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
648 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
649 for (size_t rem = count % 8; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
650 __m128i i = _mm_loadu_si32(source++);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
651 __m128 f = _mm_cvtepi32_ps(i);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
652 f = _mm_mul_ss(f, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
653 _mm_store_ss(output++, f);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
654 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
655 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
656
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
657 inline void sse_convert_from_int32(const int32_t* source, size_t count, double* output, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
658 auto mul = _mm_set1_pd(scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
659 for (size_t num = count / 8; num; --num)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
660 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
661 auto itemp1 = _mm_loadu_si128((__m128i*)source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
662 auto itemp2 = _mm_loadu_si128((__m128i*)source + 1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
663 auto temp1 = _mm_cvtepi32_pd(itemp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
664 auto temp2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(itemp1, _MM_SHUFFLE(1, 0, 3, 2)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
665 auto temp3 = _mm_cvtepi32_pd(itemp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
666 auto temp4 = _mm_cvtepi32_pd(_mm_shuffle_epi32(itemp2, _MM_SHUFFLE(1, 0, 3, 2)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
667 source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
668 temp1 = _mm_mul_pd(temp1, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
669 temp2 = _mm_mul_pd(temp2, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
670 temp3 = _mm_mul_pd(temp3, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
671 temp4 = _mm_mul_pd(temp4, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
672 _mm_storeu_pd(output, temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
673 _mm_storeu_pd(output + 2, temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
674 _mm_storeu_pd(output + 4, temp3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
675 _mm_storeu_pd(output + 6, temp4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
676 output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
677 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
678 for (size_t rem = count % 8; rem; --rem) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
679 __m128i i = _mm_loadu_si32(source++);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
680 auto f = _mm_cvtepi32_pd(i);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
681 f = _mm_mul_sd(f, mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
682 _mm_store_sd(output++, f);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
683 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
684 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
685 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
686 inline void convert_from_int16_avx(const t_int16* p_source, t_size p_count, double* p_output, double p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
687 while (!pfc::is_ptr_aligned_t<32>(p_output) && p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
688 *(p_output++) = (double)*(p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
689 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
690 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
691
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
692 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
693 __m256d muld = _mm256_set1_pd(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
694
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
695 for (t_size loop = p_count >> 3; loop; --loop) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
696 auto source = _mm_loadu_si128((__m128i*)p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
697 auto temp1 = _mm_cvtepi16_epi32(source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
698 auto temp2 = _mm_cvtepi16_epi32(_mm_shuffle_epi32(source, _MM_SHUFFLE(0, 0, 3, 2)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
699 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
700
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
701 auto double1 = _mm256_cvtepi32_pd(temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
702 auto double2 = _mm256_cvtepi32_pd(temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
703
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
704 double1 = _mm256_mul_pd(double1, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
705 double2 = _mm256_mul_pd(double2, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
706
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
707 _mm256_store_pd(p_output, double1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
708 _mm256_store_pd(p_output+4, double2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
709
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
710 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
711 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
712
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
713 p_count &= 7;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
714 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
715
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
716 while (p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
717 *(p_output++) = (double)*(p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
718 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
719 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
720
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
721 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
722 #endif // allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
723
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
724 inline void convert_from_int16_sse2(const t_int16* p_source, t_size p_count, double * p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
725 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
726 while (!pfc::is_ptr_aligned_t<16>(p_output) && p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
727 *(p_output++) = (double) * (p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
728 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
729 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
730
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
731 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
732 __m128d muld = _mm_set1_pd(p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
733 __m128i nulls = _mm_setzero_si128();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
734 __m128i delta1 = _mm_set1_epi16((int16_t)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
735 __m128i delta2 = _mm_set1_epi32((int32_t)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
736
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
737 for (t_size loop = p_count >> 3; loop; --loop) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
738 __m128i source, temp1, temp2; __m128d double1, double2, double3, double4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
739 source = _mm_loadu_si128((__m128i*)p_source);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
740 source = _mm_xor_si128(source, delta1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
741 temp1 = _mm_unpacklo_epi16(source, nulls);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
742 temp2 = _mm_unpackhi_epi16(source, nulls);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
743 temp1 = _mm_sub_epi32(temp1, delta2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
744 temp2 = _mm_sub_epi32(temp2, delta2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
745 p_source += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
746
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
747 double1 = _mm_cvtepi32_pd(temp1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
748 double2 = _mm_cvtepi32_pd(_mm_shuffle_epi32(temp1, _MM_SHUFFLE(3, 2, 3, 2)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
749 double3 = _mm_cvtepi32_pd(temp2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
750 double4 = _mm_cvtepi32_pd(_mm_shuffle_epi32(temp2, _MM_SHUFFLE(3, 2, 3, 2)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
751
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
752 double1 = _mm_mul_pd(double1, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
753 double2 = _mm_mul_pd(double2, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
754 double3 = _mm_mul_pd(double3, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
755 double4 = _mm_mul_pd(double4, muld);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
756 _mm_store_pd(p_output, double1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
757 _mm_store_pd(p_output + 2, double2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
758 _mm_store_pd(p_output + 4, double3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
759 _mm_store_pd(p_output + 6, double4);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
760
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
761 p_output += 8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
762 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
763
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
764 p_count &= 7;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
765 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
766
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
767 while (p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
768 *(p_output++) = (double) * (p_source++) * p_scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
769 p_count--;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
770 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
771 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
772
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
773 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
774
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
775 namespace pfc {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
776 void audio_math::scale(const float* p_source, size_t p_count, float* p_output, float p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
777 #if defined( AUDIO_MATH_NEON )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
778 neon_scale(p_source, p_count, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
779 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
780 noopt_scale(p_source, p_count, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
781 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
782 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
783 void audio_math::scale(const double* p_source, size_t p_count, double* p_output, double p_scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
784 noopt_scale(p_source, p_count, p_output, p_scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
785 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
786
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
787 void audio_math::convert_to_int16(const float* p_source, t_size p_count, t_int16* p_output, float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
788 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
789 float scale = (float)(p_scale * 0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
790 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
791 convert_to_16bit_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
792 #elif defined( AUDIO_MATH_NEON )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
793 neon_convert_to_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
794 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
795 noopt_convert_to_16bit(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
796 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
797 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
798 void audio_math::convert_to_int16(const double* p_source, t_size p_count, t_int16* p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
799 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
800 double scale = (double)(p_scale * 0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
801 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
802 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
803 if (haveAVX) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
804 avx_convert_to_16bit(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
805 } else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
806 #endif // allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
807 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
808 convert_to_16bit_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
809 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
810 #elif defined( AUDIO_MATH_NEON_FLOAT64 )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
811 neon_convert_to_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
812 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
813 noopt_convert_to_16bit(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
814 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
815 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
816
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
817 void audio_math::convert_from_int16(const t_int16* p_source, t_size p_count, float* p_output, float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
818 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
819 float scale = (float)(p_scale / (double)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
820 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
821 convert_from_int16_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
822 #elif defined( AUDIO_MATH_NEON )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
823 neon_convert_from_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
824 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
825 noopt_convert_from_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
826 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
827 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
828
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
829 void audio_math::convert_from_int16(const t_int16* p_source, t_size p_count, double* p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
830 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
831 double scale = (double)(p_scale / (double)0x8000);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
832 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
833 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
834 if (haveAVX) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
835 convert_from_int16_avx(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
836 } else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
837 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
838 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
839 convert_from_int16_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
840 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
841 #elif defined( AUDIO_MATH_NEON_FLOAT64 )
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
842 neon_convert_from_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
843 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
844 noopt_convert_from_int16(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
845 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
846 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
847
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
848 void audio_math::convert_to_int32(const float* p_source, t_size p_count, t_int32* p_output, float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
849 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
850 float scale = (float)(p_scale * 0x80000000ul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
851 #if defined(AUDIO_MATH_NEON)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
852 neon_convert_to_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
853 #elif defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
854 convert_to_32bit_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
855 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
856 noopt_convert_to_32bit(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
857 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
858 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
859
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
860 void audio_math::convert_to_int32(const double* p_source, t_size p_count, t_int32* p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
861 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
862 double scale = (double)(p_scale * 0x80000000ul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
863 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
864 convert_to_32bit_sse2(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
865 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
866 noopt_convert_to_32bit(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
867 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
868 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
869
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
870 void audio_math::convert_from_int32(const t_int32* p_source, t_size p_count, float* p_output, float p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
871 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
872 float scale = (float)(p_scale / (double)0x80000000ul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
873 // Note: speed difference here is marginal over compiler output as of Xcode 12
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
874 #if defined(AUDIO_MATH_NEON)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
875 neon_convert_from_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
876 #elif defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
877 sse_convert_from_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
878 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
879 noopt_convert_from_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
880 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
881 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
882
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
883 void audio_math::convert_from_int32(const t_int32* p_source, t_size p_count, double* p_output, double p_scale)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
884 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
885 double scale = (double)(p_scale / (double)0x80000000ul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
886 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
887 sse_convert_from_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
888 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
889 noopt_convert_from_int32(p_source, p_count, p_output, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
890 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
891 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
892
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
893 float audio_math::calculate_peak(const float * p_source, t_size p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
894 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
895 return sse_calculate_peak(p_source, p_count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
896 #elif defined(AUDIO_MATH_NEON)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
897 return neon_calculate_peak(p_source, p_count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
898 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
899 return noopt_calculate_peak(p_source, p_count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
900 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
901 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
902 double audio_math::calculate_peak(const double * p_source, t_size p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
903 #if defined(AUDIO_MATH_SSE)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
904 // Note that avx_calculate_peak failed to score better than sse_calculate_peak
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
905 return sse_calculate_peak(p_source, p_count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
906 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
907 return noopt_calculate_peak(p_source, p_count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
908 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
909 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
910
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
911 void audio_math::remove_denormals(float* p_buffer, t_size p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
912 t_uint32* ptr = reinterpret_cast<t_uint32*>(p_buffer);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
913 for (; p_count; p_count--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
914 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
915 t_uint32 t = *ptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
916 if ((t & 0x007FFFFF) && !(t & 0x7F800000)) *ptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
917 ptr++;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
918 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
919 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
920 void audio_math::remove_denormals(double* p_buffer, t_size p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
921 t_uint64* ptr = reinterpret_cast<t_uint64*>(p_buffer);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
922 for (; p_count; p_count--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
923 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
924 t_uint64 t = *ptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
925 if ((t & 0x000FFFFFFFFFFFFF) && !(t & 0x7FF0000000000000)) *ptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
926 ptr++;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
927 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
928 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
929
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
930 void audio_math::add_offset(float* p_buffer, float p_delta, size_t p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
931 for (size_t n = 0; n < p_count; ++n) p_buffer[n] += p_delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
932 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
933 void audio_math::add_offset(double* p_buffer, double p_delta, size_t p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
934 for (size_t n = 0; n < p_count; ++n) p_buffer[n] += p_delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
935 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
936
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
937 void audio_math::convert(const float* in, float* out, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
938 memcpy(out, in, count * sizeof(float));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
939 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
940 void audio_math::convert(const float* in, float* out, size_t count, float scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
941 audio_math::scale(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
942 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
943 void audio_math::convert(const double* in, double* out, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
944 memcpy(out, in, count * sizeof(double));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
945 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
946 void audio_math::convert(const double* in, double* out, size_t count, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
947 audio_math::scale(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
948 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
949
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
950 void audio_math::convert(const float* in, double* out, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
951 // optimize me
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
952 noopt_convert(in, out, count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
953 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
954 void audio_math::convert(const float* in, double* out, size_t count, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
955 // optimize me
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
956 noopt_scale(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
957 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
958 void audio_math::convert(const double* in, float* out, size_t count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
959 // optimize me
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
960 noopt_convert(in, out, count);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
961 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
962 void audio_math::convert(const double* in, float* out, size_t count, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
963 // optimize me
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
964 noopt_scale(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
965 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
966
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
967
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
968 typedef char store24_t;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
969 static store24_t* store24(store24_t* out, int32_t in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
970 *(out++) = ((store24_t*)&in)[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
971 *(out++) = ((store24_t*)&in)[1];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
972 *(out++) = ((store24_t*)&in)[2];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
973 return out;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
974 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
975 static store24_t* store24p(store24_t* out, int32_t in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
976 *(int32_t*)out = in;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
977 return out + 3;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
978 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
979
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
980 static constexpr int32_t INT24_MAX = 0x7FFFFF, INT24_MIN = -0x800000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
981
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
982 template<typename float_t> void convert_to_int24_noopt(float_t const* in, size_t count, void* out, float_t scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
983 if (count == 0) return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
984 --count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
985 auto ptr = reinterpret_cast<store24_t*>(out);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
986 constexpr float_t lo = INT24_MIN, hi = INT24_MAX;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
987 while (count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
988 auto vf = *in++ * scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
989 if (vf < lo) vf = lo;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
990 else if (vf > hi) vf = hi;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
991 ptr = store24p(ptr, audio_math::rint32(vf));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
992 --count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
993 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
994
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
995 auto vf = *in * scale;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
996 if (vf < lo) vf = lo;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
997 else if (vf > hi) vf = hi;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
998 store24(ptr, audio_math::rint32(vf));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
999 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1000 #ifdef AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1001 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1002 static void f64_to_i24_avx(double const* in, size_t n, uint8_t* out, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1003 const __m128i pi0 = _mm_set_epi8(-128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1004 const __m128i pi1 = _mm_set_epi8(4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1005 const __m128i pi2 = _mm_set_epi8(9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1006 const __m128i pi3 = _mm_set_epi8(14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1007 const auto mul = _mm256_set1_pd(scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1008
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1009 // PROBLEM: if we want to handle wildly out-of-bounds values, we can't do int clipping!
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1010 // float clipping is sadly considerably slower than int clipping
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1011 const auto lo = _mm256_set1_pd(INT24_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1012 const auto hi = _mm256_set1_pd(INT24_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1013
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1014 while (n >= 4 * 4) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1015 auto f0 = _mm256_mul_pd(_mm256_loadu_pd(in + 0), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1016 auto f1 = _mm256_mul_pd(_mm256_loadu_pd(in + 4), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1017 auto f2 = _mm256_mul_pd(_mm256_loadu_pd(in + 8), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1018 auto f3 = _mm256_mul_pd(_mm256_loadu_pd(in + 12), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1019 f0 = _mm256_max_pd(_mm256_min_pd(f0, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1020 f1 = _mm256_max_pd(_mm256_min_pd(f1, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1021 f2 = _mm256_max_pd(_mm256_min_pd(f2, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1022 f3 = _mm256_max_pd(_mm256_min_pd(f3, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1023 __m128i w0 = _mm256_cvtpd_epi32(f0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1024 __m128i w1 = _mm256_cvtpd_epi32(f1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1025 __m128i w2 = _mm256_cvtpd_epi32(f2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1026 __m128i w3 = _mm256_cvtpd_epi32(f3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1027
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1028 // _mm_shuffle_epi8 : SSSE3
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1029 w0 = _mm_shuffle_epi8(w0, pi0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1030 w1 = _mm_shuffle_epi8(w1, pi1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1031 w2 = _mm_shuffle_epi8(w2, pi2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1032 w3 = _mm_shuffle_epi8(w3, pi3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1033
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1034 // _mm_blend_epi16 : SSE4.1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1035 __m128i u0 = _mm_blend_epi16(w0, w1, 0xC0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1036 __m128i u1 = _mm_blend_epi16(w1, w2, 0xF0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1037 __m128i u2 = _mm_blend_epi16(w2, w3, 0xFC);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1038
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1039 _mm_storeu_si128((__m128i*)(out + 0), u0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1040 _mm_storeu_si128((__m128i*)(out + 16), u1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1041 _mm_storeu_si128((__m128i*)(out + 32), u2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1042
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1043 in += 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1044 out += 16 * 3;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1045 n -= 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1046 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1047
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1048 convert_to_int24_noopt(in, n, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1049 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1050 #endif // allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1051 static void f64_to_i24_sse41(double const* in, size_t n, uint8_t* out, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1052 const __m128i pi0 = _mm_set_epi8(-128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1053 const __m128i pi1 = _mm_set_epi8(4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1054 const __m128i pi2 = _mm_set_epi8(9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1055 const __m128i pi3 = _mm_set_epi8(14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1056 const auto mul = _mm_set1_pd(scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1057
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1058 // PROBLEM: if we want to handle wildly out-of-bounds values, we can't do int clipping!
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1059 // float clipping is sadly considerably slower than int clipping
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1060 const auto lo = _mm_set1_pd(INT24_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1061 const auto hi = _mm_set1_pd(INT24_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1062
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1063 while (n >= 4 * 4) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1064 auto f0 = _mm_mul_pd(_mm_loadu_pd(in + 0), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1065 auto f1 = _mm_mul_pd(_mm_loadu_pd(in + 2), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1066 auto f2 = _mm_mul_pd(_mm_loadu_pd(in + 4), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1067 auto f3 = _mm_mul_pd(_mm_loadu_pd(in + 6), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1068 auto f4 = _mm_mul_pd(_mm_loadu_pd(in + 8), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1069 auto f5 = _mm_mul_pd(_mm_loadu_pd(in + 10), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1070 auto f6 = _mm_mul_pd(_mm_loadu_pd(in + 12), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1071 auto f7 = _mm_mul_pd(_mm_loadu_pd(in + 14), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1072 f0 = _mm_max_pd(_mm_min_pd(f0, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1073 f1 = _mm_max_pd(_mm_min_pd(f1, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1074 f2 = _mm_max_pd(_mm_min_pd(f2, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1075 f3 = _mm_max_pd(_mm_min_pd(f3, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1076 f4 = _mm_max_pd(_mm_min_pd(f4, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1077 f5 = _mm_max_pd(_mm_min_pd(f5, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1078 f6 = _mm_max_pd(_mm_min_pd(f6, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1079 f7 = _mm_max_pd(_mm_min_pd(f7, hi), lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1080
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1081
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1082
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1083 __m128i w0 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(f0), _mm_cvtpd_epi32(f1));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1084 __m128i w1 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(f2), _mm_cvtpd_epi32(f3));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1085 __m128i w2 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(f4), _mm_cvtpd_epi32(f5));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1086 __m128i w3 = _mm_unpacklo_epi64(_mm_cvtpd_epi32(f6), _mm_cvtpd_epi32(f7));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1087
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1088 // _mm_shuffle_epi8 : SSSE3
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1089 w0 = _mm_shuffle_epi8(w0, pi0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1090 w1 = _mm_shuffle_epi8(w1, pi1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1091 w2 = _mm_shuffle_epi8(w2, pi2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1092 w3 = _mm_shuffle_epi8(w3, pi3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1093
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1094 // _mm_blend_epi16 : SSE4.1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1095 __m128i u0 = _mm_blend_epi16(w0, w1, 0xC0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1096 __m128i u1 = _mm_blend_epi16(w1, w2, 0xF0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1097 __m128i u2 = _mm_blend_epi16(w2, w3, 0xFC);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1098
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1099 _mm_storeu_si128((__m128i*)(out + 0), u0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1100 _mm_storeu_si128((__m128i*)(out + 16), u1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1101 _mm_storeu_si128((__m128i*)(out + 32), u2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1102
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1103 in += 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1104 out += 16 * 3;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1105 n -= 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1106 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1107
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1108 convert_to_int24_noopt(in, n, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1109 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1110 static void f32_to_i24_sse41(float const* in, size_t n, uint8_t* out, float scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1111 const __m128i pi0 = _mm_set_epi8(-128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1112 const __m128i pi1 = _mm_set_epi8(4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10, 9, 8, 6, 5);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1113 const __m128i pi2 = _mm_set_epi8(9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128, 14, 13, 12, 10);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1114 const __m128i pi3 = _mm_set_epi8(14, 13, 12, 10, 9, 8, 6, 5, 4, 2, 1, 0, -128, -128, -128, -128);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1115 const __m128 mul = _mm_set1_ps(scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1116
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1117 // PROBLEM: if we want to handle wildly out-of-bounds values, we can't do int clipping!
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1118 // float clipping is sadly considerably slower than int clipping
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1119 const auto lo = _mm_set1_ps(INT24_MIN);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1120 const auto hi = _mm_set1_ps(INT24_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1121
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1122 while (n >= 4 * 4) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1123 auto f0 = _mm_mul_ps(_mm_loadu_ps(in + 0), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1124 auto f1 = _mm_mul_ps(_mm_loadu_ps(in + 4), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1125 auto f2 = _mm_mul_ps(_mm_loadu_ps(in + 8), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1126 auto f3 = _mm_mul_ps(_mm_loadu_ps(in + 12), mul);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1127 f0 = _mm_min_ps(_mm_max_ps(f0, lo), hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1128 f1 = _mm_min_ps(_mm_max_ps(f1, lo), hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1129 f2 = _mm_min_ps(_mm_max_ps(f2, lo), hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1130 f3 = _mm_min_ps(_mm_max_ps(f3, lo), hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1131 __m128i w0 = _mm_cvtps_epi32(f0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1132 __m128i w1 = _mm_cvtps_epi32(f1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1133 __m128i w2 = _mm_cvtps_epi32(f2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1134 __m128i w3 = _mm_cvtps_epi32(f3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1135
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1136 // _mm_shuffle_epi8 : SSSE3
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1137 w0 = _mm_shuffle_epi8(w0, pi0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1138 w1 = _mm_shuffle_epi8(w1, pi1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1139 w2 = _mm_shuffle_epi8(w2, pi2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1140 w3 = _mm_shuffle_epi8(w3, pi3);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1141
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1142 // _mm_blend_epi16 : SSE4.1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1143 __m128i u0 = _mm_blend_epi16(w0, w1, 0xC0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1144 __m128i u1 = _mm_blend_epi16(w1, w2, 0xF0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1145 __m128i u2 = _mm_blend_epi16(w2, w3, 0xFC);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1146
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1147 _mm_storeu_si128((__m128i*)(out + 0), u0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1148 _mm_storeu_si128((__m128i*)(out + 16), u1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1149 _mm_storeu_si128((__m128i*)(out + 32), u2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1150
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1151 in += 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1152 out += 16 * 3;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1153 n -= 4 * 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1154 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1155
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1156 convert_to_int24_noopt(in, n, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1157 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1158
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1159 #endif // AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1160
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1161 void audio_math::convert_to_int24(const float* in, size_t count, void* out, float scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1162 scale *= 0x800000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1163
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1164 #ifdef AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1165 if (haveSSE41) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1166 f32_to_i24_sse41(in, count, (uint8_t*)out, scale); return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1167 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1168 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1169 convert_to_int24_noopt(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1170 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1171 void audio_math::convert_to_int24(const double* in, size_t count, void* out, double scale) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1172 scale *= 0x800000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1173 #ifdef AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1174 #if allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1175 if (haveAVX) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1176 f64_to_i24_avx(in, count, (uint8_t*)out, scale); return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1177 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1178 #endif // allowAVX
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1179 if (haveSSE41) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1180 f64_to_i24_sse41(in, count, (uint8_t*)out, scale); return;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1181 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1182 #endif // AUDIO_MATH_SSE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1183 convert_to_int24_noopt(in, count, out, scale);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1184 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1185
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1186 }