Mercurial > vec
comparison test/test_benchmark.h @ 45:7955bed1d169
*: add preliminary floating point support
no x86 intrinsics just yet, but I did add altivec since it's
(arguably) the simplest :)
| author | Paper <paper@tflc.us> |
|---|---|
| date | Wed, 30 Apr 2025 18:36:38 -0400 |
| parents | c6e0df09b86f |
| children |
comparison
equal
deleted
inserted
replaced
| 44:b0a3f0248ecc | 45:7955bed1d169 |
|---|---|
| 1 | 1 |
| 2 /* ------------------------------------------------------------------------ */ | 2 /* ------------------------------------------------------------------------ */ |
| 3 /* simple benchmark for getting the min/max range of an audio sample. */ | 3 /* simple benchmark for getting the min/max range of an audio sample. */ |
| 4 | 4 |
| 5 extern void test_benchmark_sample_minmax_simple_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax); | 5 extern void test_benchmark_sample_minmax_int8x2_impl(vec_int8 *smpl, uint32_t length, vec_int8 *pmin, vec_int8 *pmax); |
| 6 extern void test_benchmark_sample_minmax_vec_impl(int16_t *smpl, uint32_t length, int32_t *pmin, int32_t *pmax); | |
| 7 | 6 |
| 8 VEC_FUNC_IMPL void test_benchmark_sample_minmax(void) | 7 VEC_FUNC_IMPL void test_benchmark_sample_minmax(void) |
| 9 { | 8 { |
| 10 int32_t min, max; | |
| 11 clock_t start, end; | |
| 12 int i; | 9 int i; |
| 13 int16_t *q = vec_malloc(16000001u * 2u); | |
| 14 | 10 |
| 15 printf("\nsigned 16-bit audio sample min/max - 1 thousand passes - 16000001 samples\n\n"); | 11 union { |
| 12 vec_int8 int8[16000001]; | |
| 13 vec_uint8 uint8[16000001]; | |
| 14 vec_int16 int16[16000001]; | |
| 15 vec_uint16 uint16[16000001]; | |
| 16 vec_int32 int32[16000001]; | |
| 17 vec_uint32 uint32[16000001]; | |
| 18 vec_int64 int64[16000001]; | |
| 19 vec_uint64 uint64[16000001]; | |
| 20 vec_f32 f32[16000001]; | |
| 21 vec_f64 f64[16000001]; | |
| 22 } *q; | |
| 16 | 23 |
| 17 start = clock(); | 24 q = vec_malloc(sizeof(*q)); |
| 18 for (i = 0; i < 100; i++) { | 25 |
| 19 min = INT32_MAX; | 26 for (i = 0; i < 16000001; i++) |
| 20 max = INT32_MIN; | 27 q->f64[i] = (double)rand() / RAND_MAX; |
| 21 test_benchmark_sample_minmax_vec_impl(q, 16000001u, &min, &max); | 28 |
| 29 printf("\naudio sample min/max - 1 thousand passes - 16000001 samples\n\n"); | |
| 30 | |
| 31 #define DO_TIMER(TYPE,NAME,MIN,MAX) \ | |
| 32 { \ | |
| 33 vec_##TYPE min, max; \ | |
| 34 clock_t start, end; \ | |
| 35 \ | |
| 36 start = clock(); \ | |
| 37 \ | |
| 38 for (i = 0; i < 1000; i++) { \ | |
| 39 extern void test_benchmark_sample_minmax_##NAME##_impl(vec_##TYPE *smpl, uint32_t length, vec_##TYPE *pmin, vec_##TYPE *pmax); \ | |
| 40 \ | |
| 41 min = MAX; \ | |
| 42 max = MIN; \ | |
| 43 \ | |
| 44 test_benchmark_sample_minmax_##NAME##_impl(q->TYPE, 16000001u, &min, &max); \ | |
| 45 } \ | |
| 46 \ | |
| 47 end = clock(); \ | |
| 48 \ | |
| 49 printf("- %s: took %f secs\n", #NAME, (double)(end - start) / CLOCKS_PER_SEC); \ | |
| 22 } | 50 } |
| 23 end = clock(); | |
| 24 | 51 |
| 25 printf("- vec: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC); | 52 DO_TIMER(int8, int8x2, INT8_MIN, INT8_MAX) |
| 53 DO_TIMER(int8, int8x4, INT8_MIN, INT8_MAX) | |
| 54 DO_TIMER(int8, int8x8, INT8_MIN, INT8_MAX) | |
| 55 DO_TIMER(int8, int8x16, INT8_MIN, INT8_MAX) | |
| 56 DO_TIMER(int8, int8x32, INT8_MIN, INT8_MAX) | |
| 57 DO_TIMER(int8, int8x64, INT8_MIN, INT8_MAX) | |
| 58 DO_TIMER(int8, int8, INT8_MIN, INT8_MAX) | |
| 26 | 59 |
| 27 start = clock(); | 60 DO_TIMER(int16, int16x2, INT16_MIN, INT16_MAX) |
| 28 for (i = 0; i < 100; i++) { | 61 DO_TIMER(int16, int16x4, INT16_MIN, INT16_MAX) |
| 29 min = INT32_MAX; | 62 DO_TIMER(int16, int16x8, INT16_MIN, INT16_MAX) |
| 30 max = INT32_MIN; | 63 DO_TIMER(int16, int16x16, INT16_MIN, INT16_MAX) |
| 31 test_benchmark_sample_minmax_simple_impl(q, 16000001u, &min, &max); | 64 DO_TIMER(int16, int16x32, INT16_MIN, INT16_MAX) |
| 32 } | 65 DO_TIMER(int16, int16, INT16_MIN, INT16_MAX) |
| 33 end = clock(); | |
| 34 | 66 |
| 35 printf("- simple: took %f secs\n", (double)(end - start) / CLOCKS_PER_SEC); | 67 DO_TIMER(int32, int32x2, INT32_MIN, INT32_MAX) |
| 68 DO_TIMER(int32, int32x4, INT32_MIN, INT32_MAX) | |
| 69 DO_TIMER(int32, int32x8, INT32_MIN, INT32_MAX) | |
| 70 DO_TIMER(int32, int32x16, INT32_MIN, INT32_MAX) | |
| 71 DO_TIMER(int32, int32, INT32_MIN, INT32_MAX) | |
| 72 | |
| 73 DO_TIMER(int64, int64x2, INT64_MIN, INT64_MAX) | |
| 74 DO_TIMER(int64, int64x4, INT64_MIN, INT64_MAX) | |
| 75 DO_TIMER(int64, int64x8, INT64_MIN, INT64_MAX) | |
| 76 DO_TIMER(int64, int64, INT64_MIN, INT64_MAX) | |
| 77 | |
| 78 DO_TIMER(uint8, uint8x2, 0, UINT8_MAX) | |
| 79 DO_TIMER(uint8, uint8x4, 0, UINT8_MAX) | |
| 80 DO_TIMER(uint8, uint8x8, 0, UINT8_MAX) | |
| 81 DO_TIMER(uint8, uint8x16, 0, UINT8_MAX) | |
| 82 DO_TIMER(uint8, uint8x32, 0, UINT8_MAX) | |
| 83 DO_TIMER(uint8, uint8x64, 0, UINT8_MAX) | |
| 84 DO_TIMER(uint8, uint8, 0, UINT8_MAX) | |
| 85 | |
| 86 DO_TIMER(uint16, uint16x2, 0, UINT16_MAX) | |
| 87 DO_TIMER(uint16, uint16x4, 0, UINT16_MAX) | |
| 88 DO_TIMER(uint16, uint16x8, 0, UINT16_MAX) | |
| 89 DO_TIMER(uint16, uint16x16, 0, UINT16_MAX) | |
| 90 DO_TIMER(uint16, uint16x32, 0, UINT16_MAX) | |
| 91 DO_TIMER(uint16, uint16, 0, UINT16_MAX) | |
| 92 | |
| 93 DO_TIMER(uint32, uint32x2, 0, UINT32_MAX) | |
| 94 DO_TIMER(uint32, uint32x4, 0, UINT32_MAX) | |
| 95 DO_TIMER(uint32, uint32x8, 0, UINT32_MAX) | |
| 96 DO_TIMER(uint32, uint32x16, 0, UINT32_MAX) | |
| 97 DO_TIMER(uint32, uint32, 0, UINT32_MAX) | |
| 98 | |
| 99 DO_TIMER(uint64, uint64x2, 0, UINT64_MAX) | |
| 100 DO_TIMER(uint64, uint64x4, 0, UINT64_MAX) | |
| 101 DO_TIMER(uint64, uint64x8, 0, UINT64_MAX) | |
| 102 DO_TIMER(uint64, uint64, 0, UINT64_MAX) | |
| 103 | |
| 104 DO_TIMER(f32, f32x2, -1.0f, 1.0f) | |
| 105 DO_TIMER(f32, f32x4, -1.0f, 1.0f) | |
| 106 DO_TIMER(f32, f32x8, -1.0f, 1.0f) | |
| 107 DO_TIMER(f32, f32x16, -1.0f, 1.0f) | |
| 108 DO_TIMER(f32, f32, -1.0f, 1.0f) | |
| 109 | |
| 110 DO_TIMER(f64, f64x2, -1.0, 1.0) | |
| 111 DO_TIMER(f64, f64x4, -1.0, 1.0) | |
| 112 DO_TIMER(f64, f64x8, -1.0, 1.0) | |
| 113 DO_TIMER(f64, f64, -1.0, 1.0) | |
| 36 | 114 |
| 37 printf("\n"); | 115 printf("\n"); |
| 38 | 116 |
| 39 vec_free(q); | 117 vec_free(q); |
| 40 } | 118 } |
