Mercurial > minori
comparison dep/utf8proc/bench/icu.c @ 343:1faa72660932
*: transfer back to cmake from autotools
autotools just made lots of things more complicated than
they should have and many things broke (i.e. translations)
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Thu, 20 Jun 2024 05:56:06 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
342:adb79bdde329 | 343:1faa72660932 |
---|---|
1 #include <stdio.h> | |
2 #include <stdlib.h> | |
3 | |
4 /* ICU4C */ | |
5 #include <unicode/utypes.h> | |
6 #include <unicode/ustring.h> | |
7 #include <unicode/ucnv.h> | |
8 #include <unicode/unorm2.h> | |
9 | |
10 #include "util.h" | |
11 | |
12 int main(int argc, char **argv) | |
13 { | |
14 int i; | |
15 | |
16 UErrorCode err; | |
17 UConverter *uc = ucnv_open("UTF8", &err); | |
18 if (U_FAILURE(err)) return EXIT_FAILURE; | |
19 | |
20 const UNormalizer2 *NFKC = unorm2_getNFKCInstance(&err); | |
21 if (U_FAILURE(err)) return EXIT_FAILURE; | |
22 | |
23 for (i = 1; i < argc; ++i) { | |
24 if (argv[i][0] == '-') { | |
25 fprintf(stderr, "unrecognized option: %s\n", argv[i]); | |
26 return EXIT_FAILURE; | |
27 } | |
28 | |
29 size_t len; | |
30 uint8_t *src = readfile(argv[i], &len); | |
31 if (!src) { | |
32 fprintf(stderr, "error reading %s\n", argv[i]); | |
33 return EXIT_FAILURE; | |
34 } | |
35 | |
36 /* convert UTF8 data to ICU's UTF16 */ | |
37 UChar *usrc = (UChar*) malloc(2*len * sizeof(UChar)); | |
38 ucnv_toUChars(uc, usrc, 2*len, (char*) src, len, &err); | |
39 if (U_FAILURE(err)) return EXIT_FAILURE; | |
40 size_t ulen = u_strlen(usrc); | |
41 | |
42 /* ICU's insane normalization API requires you to | |
43 know the size of the destination buffer in advance, | |
44 or alternatively to repeatedly try normalizing and | |
45 double the buffer size until it succeeds. Here, I just | |
46 allocate a huge destination buffer to avoid the issue. */ | |
47 UChar *udest = (UChar*) malloc(10*ulen * sizeof(UChar)); | |
48 | |
49 mytime start = gettime(); | |
50 for (int i = 0; i < 100; ++i) { | |
51 unorm2_normalize(NFKC, usrc, ulen, udest, 10*ulen, &err); | |
52 if (U_FAILURE(err)) return EXIT_FAILURE; | |
53 } | |
54 printf("%s: %g\n", argv[i], elapsed(gettime(), start) / 100); | |
55 free(udest); | |
56 free(usrc); | |
57 free(src); | |
58 } | |
59 | |
60 return EXIT_SUCCESS; | |
61 } |