Mercurial > minori
annotate dep/utf8proc/test/charwidth.c @ 347:a0aa8c8c4307
dep/anitomy: port to use UCS-4 rather than wide strings
rationale: wide strings are not the same on every platform, and
might not even be Unicode. (while they usually are, its possible
that they are not)
I was *going* to change StringToInt to use a string stream, but
outputting to an integer doesn't seem to work at all with UCS-4,
even though it ought to, so I just rolled my own that uses the
arabic digits only.
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Sun, 23 Jun 2024 10:32:09 -0400 |
parents | 1faa72660932 |
children |
rev | line source |
---|---|
343
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
1 #include "tests.h" |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
2 #include <ctype.h> |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
3 #include <wchar.h> |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
4 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
5 static int my_unassigned(int c) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
6 int cat = utf8proc_get_property(c)->category; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
7 return (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
8 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
9 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
10 static int my_isprint(int c) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
11 int cat = utf8proc_get_property(c)->category; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
12 return (UTF8PROC_CATEGORY_LU <= cat && cat <= UTF8PROC_CATEGORY_ZS) || |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
13 (c == 0x0601 || c == 0x0602 || c == 0x0603 || c == 0x06dd || c == 0x00ad) || |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
14 (cat == UTF8PROC_CATEGORY_CN) || (cat == UTF8PROC_CATEGORY_CO); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
15 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
16 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
17 int main(int argc, char **argv) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
18 { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
19 int c, error = 0, updates = 0; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
20 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
21 (void) argc; /* unused */ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
22 (void) argv; /* unused */ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
23 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
24 /* some simple sanity tests of the character widths */ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
25 for (c = 0; c <= 0x110000; ++c) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
26 int cat = utf8proc_get_property(c)->category; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
27 int w = utf8proc_charwidth(c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
28 if ((cat == UTF8PROC_CATEGORY_MN || cat == UTF8PROC_CATEGORY_ME) && w > 0) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
29 fprintf(stderr, "nonzero width %d for combining char %x\n", w, c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
30 error += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
31 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
32 if (w == 0 && |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
33 ((cat >= UTF8PROC_CATEGORY_LU && cat <= UTF8PROC_CATEGORY_LO) || |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
34 (cat >= UTF8PROC_CATEGORY_ND && cat <= UTF8PROC_CATEGORY_SC) || |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
35 (cat >= UTF8PROC_CATEGORY_SO && cat <= UTF8PROC_CATEGORY_ZS))) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
36 fprintf(stderr, "zero width for symbol-like char %x\n", c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
37 error += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
38 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
39 if (c <= 127 && ((!isprint(c) && w > 0) || (isprint(c) && wcwidth(c) != w))) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
40 fprintf(stderr, "wcwidth %d mismatch %d for %s ASCII %x\n", |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
41 wcwidth(c), w, |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
42 isprint(c) ? "printable" : "non-printable", c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
43 error += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
44 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
45 if (!my_isprint(c) && w > 0) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
46 fprintf(stderr, "non-printing %x had width %d\n", c, w); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
47 error += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
48 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
49 if (my_unassigned(c) && w != 1) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
50 fprintf(stderr, "unexpected width %d for unassigned char %x\n", w, c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
51 error += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
52 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
53 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
54 check(!error, "utf8proc_charwidth FAILED %d tests.", error); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
55 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
56 check(utf8proc_charwidth(0x00ad) == 1, "incorrect width for U+00AD (soft hyphen)"); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
57 check(utf8proc_charwidth(0xe000) == 1, "incorrect width for U+e000 (PUA)"); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
58 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
59 /* print some other information by compariing with system wcwidth */ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
60 printf("Mismatches with system wcwidth (not necessarily errors):\n"); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
61 for (c = 0; c <= 0x110000; ++c) { |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
62 int w = utf8proc_charwidth(c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
63 int wc = wcwidth(c); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
64 if (sizeof(wchar_t) == 2 && c >= (1<<16)) continue; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
65 /* lots of these errors for out-of-date system unicode tables */ |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
66 if (wc == -1 && my_isprint(c) && !my_unassigned(c) && w > 0) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
67 updates += 1; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
68 if (wc == -1 && !my_isprint(c) && w > 0) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
69 printf(" wcwidth(%x) = -1 for non-printable width-%d char\n", c, w); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
70 if (wc >= 0 && wc != w) |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
71 printf(" wcwidth(%x) = %d != charwidth %d\n", c, wc, w); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
72 } |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
73 printf(" ... (positive widths for %d chars unknown to wcwidth) ...\n", updates); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
74 printf("Character-width tests SUCCEEDED.\n"); |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
75 |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
76 return 0; |
1faa72660932
*: transfer back to cmake from autotools
Paper <paper@paper.us.eu.org>
parents:
diff
changeset
|
77 } |