Mercurial > minori
comparison dep/utf8proc/bench/icu.c @ 343:1faa72660932
*: transfer back to cmake from autotools
autotools just made lots of things more complicated than
they should have and many things broke (i.e. translations)
| author | Paper <paper@paper.us.eu.org> |
|---|---|
| date | Thu, 20 Jun 2024 05:56:06 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 342:adb79bdde329 | 343:1faa72660932 |
|---|---|
| 1 #include <stdio.h> | |
| 2 #include <stdlib.h> | |
| 3 | |
| 4 /* ICU4C */ | |
| 5 #include <unicode/utypes.h> | |
| 6 #include <unicode/ustring.h> | |
| 7 #include <unicode/ucnv.h> | |
| 8 #include <unicode/unorm2.h> | |
| 9 | |
| 10 #include "util.h" | |
| 11 | |
| 12 int main(int argc, char **argv) | |
| 13 { | |
| 14 int i; | |
| 15 | |
| 16 UErrorCode err; | |
| 17 UConverter *uc = ucnv_open("UTF8", &err); | |
| 18 if (U_FAILURE(err)) return EXIT_FAILURE; | |
| 19 | |
| 20 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err); | |
| 21 if (U_FAILURE(err)) return EXIT_FAILURE; | |
| 22 | |
| 23 for (i = 1; i < argc; ++i) { | |
| 24 if (argv[i][0] == '-') { | |
| 25 fprintf(stderr, "unrecognized option: %s\n", argv[i]); | |
| 26 return EXIT_FAILURE; | |
| 27 } | |
| 28 | |
| 29 size_t len; | |
| 30 uint8_t *src = readfile(argv[i], &len); | |
| 31 if (!src) { | |
| 32 fprintf(stderr, "error reading %s\n", argv[i]); | |
| 33 return EXIT_FAILURE; | |
| 34 } | |
| 35 | |
| 36 /* convert UTF8 data to ICU's UTF16 */ | |
| 37 UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar)); | |
| 38 ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err); | |
| 39 if (U_FAILURE(err)) return EXIT_FAILURE; | |
| 40 size_t ulen = u_strlen(usrc); | |
| 41 | |
| 42 /* ICU's insane normalization API requires you to | |
| 43 know the size of the destination buffer in advance, | |
| 44 or alternatively to repeatedly try normalizing and | |
| 45 double the buffer size until it succeeds. Here, I just | |
| 46 allocate a huge destination buffer to avoid the issue. */ | |
| 47 UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar)); | |
| 48 | |
| 49 mytime start = gettime(); | |
| 50 for (int i = 0; i < 100; ++i) { | |
| 51 unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err); | |
| 52 if (U_FAILURE(err)) return EXIT_FAILURE; | |
| 53 } | |
| 54 printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); | |
| 55 free(udest); | |
| 56 free(usrc); | |
| 57 free(src); | |
| 58 } | |
| 59 | |
| 60 return EXIT_SUCCESS; | |
| 61 } |
