annotate foosdk/sdk/pfc/unicode-normalize.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #include "pfc-lite.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2 #include "unicode-normalize.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 #include "string_base.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4 #include <map>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8 static constexpr uint16_t modifiers[] =
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9 {0x0300, 0x0301, 0x0302, 0x0303, 0x0304, 0x0306, 0x0307, 0x0308, 0x0309, 0x030A, 0x030B, 0x030C, 0x030F, 0x0311, 0x0313, 0x0314, 0x031B, 0x0323, 0x0324, 0x0325, 0x0326, 0x0327, 0x0328, 0x032D, 0x032E, 0x0330, 0x0331, 0x0338, 0x0342, 0x0345, 0x05B4, 0x05B7, 0x05B8, 0x05B9, 0x05BC, 0x05BF, 0x05C1, 0x05C2, 0x0653, 0x0654, 0x0655, 0x093C, 0x09BC, 0x09BE, 0x09D7, 0x0A3C, 0x0B3C, 0x0B3E, 0x0B56, 0x0B57, 0x0BBE, 0x0BD7, 0x0C56, 0x0CC2, 0x0CD5, 0x0CD6, 0x0D3E, 0x0D57, 0x0DCA, 0x0DCF, 0x0DDF, 0x0F72, 0x0F74, 0x0F80, 0x0FB5, 0x0FB7, 0x102E, 0x1161, 0x1162, 0x1163, 0x1164, 0x1165, 0x1166, 0x1167, 0x1168, 0x1169, 0x116A, 0x116B, 0x116C, 0x116D, 0x116E, 0x116F, 0x1170, 0x1171, 0x1172, 0x1173, 0x1174, 0x1175, 0x11A8, 0x11A9, 0x11AA, 0x11AB, 0x11AC, 0x11AD, 0x11AE, 0x11AF, 0x11B0, 0x11B1, 0x11B2, 0x11B3, 0x11B4, 0x11B5, 0x11B6, 0x11B7, 0x11B8, 0x11B9, 0x11BA, 0x11BB, 0x11BC, 0x11BD, 0x11BE, 0x11BF, 0x11C0, 0x11C1, 0x11C2, 0x1B35, 0x3099, 0x309A};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 static bool is_modifier( unsigned c ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 if (c < modifiers[0]) return false; // common case, bail early
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 unsigned lo = 0, hi = (unsigned) std::size(modifiers);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 do {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 unsigned mid = (lo+hi)/2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 unsigned hit = modifiers[mid];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 if ( c < hit ) hi = mid;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 else if ( c > hit ) lo = mid + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19 else return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 } while(lo < hi);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24 static constexpr struct {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 uint16_t form1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 uint16_t form2[2];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 } normtable[] = {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 {0x00C0, {0x0041,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 {0x00C1, {0x0041,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30 {0x00C2, {0x0041,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 {0x00C3, {0x0041,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 {0x00C4, {0x0041,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33 {0x00C5, {0x0041,0x030A}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 {0x00C7, {0x0043,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 {0x00C8, {0x0045,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 {0x00C9, {0x0045,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 {0x00CA, {0x0045,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 {0x00CB, {0x0045,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 {0x00CC, {0x0049,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40 {0x00CD, {0x0049,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 {0x00CE, {0x0049,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42 {0x00CF, {0x0049,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 {0x00D1, {0x004E,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 {0x00D2, {0x004F,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 {0x00D3, {0x004F,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 {0x00D4, {0x004F,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 {0x00D5, {0x004F,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48 {0x00D6, {0x004F,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 {0x00D9, {0x0055,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 {0x00DA, {0x0055,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 {0x00DB, {0x0055,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 {0x00DC, {0x0055,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53 {0x00DD, {0x0059,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 {0x00E0, {0x0061,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 {0x00E1, {0x0061,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 {0x00E2, {0x0061,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57 {0x00E3, {0x0061,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 {0x00E4, {0x0061,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59 {0x00E5, {0x0061,0x030A}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 {0x00E7, {0x0063,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 {0x00E8, {0x0065,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62 {0x00E9, {0x0065,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 {0x00EA, {0x0065,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 {0x00EB, {0x0065,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 {0x00EC, {0x0069,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 {0x00ED, {0x0069,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67 {0x00EE, {0x0069,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 {0x00EF, {0x0069,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 {0x00F1, {0x006E,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 {0x00F2, {0x006F,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71 {0x00F3, {0x006F,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72 {0x00F4, {0x006F,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73 {0x00F5, {0x006F,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 {0x00F6, {0x006F,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 {0x00F9, {0x0075,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76 {0x00FA, {0x0075,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77 {0x00FB, {0x0075,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78 {0x00FC, {0x0075,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 {0x00FD, {0x0079,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 {0x00FF, {0x0079,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 {0x0100, {0x0041,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 {0x0101, {0x0061,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 {0x0102, {0x0041,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 {0x0103, {0x0061,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 {0x0104, {0x0041,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 {0x0105, {0x0061,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 {0x0106, {0x0043,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88 {0x0107, {0x0063,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 {0x0108, {0x0043,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90 {0x0109, {0x0063,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 {0x010A, {0x0043,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 {0x010B, {0x0063,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 {0x010C, {0x0043,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 {0x010D, {0x0063,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 {0x010E, {0x0044,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 {0x010F, {0x0064,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 {0x0112, {0x0045,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 {0x0113, {0x0065,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 {0x0114, {0x0045,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 {0x0115, {0x0065,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 {0x0116, {0x0045,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102 {0x0117, {0x0065,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 {0x0118, {0x0045,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 {0x0119, {0x0065,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105 {0x011A, {0x0045,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 {0x011B, {0x0065,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107 {0x011C, {0x0047,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108 {0x011D, {0x0067,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109 {0x011E, {0x0047,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110 {0x011F, {0x0067,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111 {0x0120, {0x0047,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 {0x0121, {0x0067,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 {0x0122, {0x0047,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 {0x0123, {0x0067,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115 {0x0124, {0x0048,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 {0x0125, {0x0068,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 {0x0128, {0x0049,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118 {0x0129, {0x0069,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119 {0x012A, {0x0049,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 {0x012B, {0x0069,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
121 {0x012C, {0x0049,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
122 {0x012D, {0x0069,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
123 {0x012E, {0x0049,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
124 {0x012F, {0x0069,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
125 {0x0130, {0x0049,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
126 {0x0134, {0x004A,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
127 {0x0135, {0x006A,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
128 {0x0136, {0x004B,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
129 {0x0137, {0x006B,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
130 {0x0139, {0x004C,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
131 {0x013A, {0x006C,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
132 {0x013B, {0x004C,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
133 {0x013C, {0x006C,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
134 {0x013D, {0x004C,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
135 {0x013E, {0x006C,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
136 {0x0143, {0x004E,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
137 {0x0144, {0x006E,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
138 {0x0145, {0x004E,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
139 {0x0146, {0x006E,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
140 {0x0147, {0x004E,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
141 {0x0148, {0x006E,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
142 {0x014C, {0x004F,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
143 {0x014D, {0x006F,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
144 {0x014E, {0x004F,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
145 {0x014F, {0x006F,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
146 {0x0150, {0x004F,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
147 {0x0151, {0x006F,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
148 {0x0154, {0x0052,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
149 {0x0155, {0x0072,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
150 {0x0156, {0x0052,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
151 {0x0157, {0x0072,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
152 {0x0158, {0x0052,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
153 {0x0159, {0x0072,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
154 {0x015A, {0x0053,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
155 {0x015B, {0x0073,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
156 {0x015C, {0x0053,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
157 {0x015D, {0x0073,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
158 {0x015E, {0x0053,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
159 {0x015F, {0x0073,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
160 {0x0160, {0x0053,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
161 {0x0161, {0x0073,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
162 {0x0162, {0x0054,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
163 {0x0163, {0x0074,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
164 {0x0164, {0x0054,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
165 {0x0165, {0x0074,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
166 {0x0168, {0x0055,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
167 {0x0169, {0x0075,0x0303}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
168 {0x016A, {0x0055,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
169 {0x016B, {0x0075,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
170 {0x016C, {0x0055,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
171 {0x016D, {0x0075,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
172 {0x016E, {0x0055,0x030A}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
173 {0x016F, {0x0075,0x030A}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
174 {0x0170, {0x0055,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
175 {0x0171, {0x0075,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
176 {0x0172, {0x0055,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
177 {0x0173, {0x0075,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
178 {0x0174, {0x0057,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
179 {0x0175, {0x0077,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
180 {0x0176, {0x0059,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
181 {0x0177, {0x0079,0x0302}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
182 {0x0178, {0x0059,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
183 {0x0179, {0x005A,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
184 {0x017A, {0x007A,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
185 {0x017B, {0x005A,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
186 {0x017C, {0x007A,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
187 {0x017D, {0x005A,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
188 {0x017E, {0x007A,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
189 {0x01A0, {0x004F,0x031B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
190 {0x01A1, {0x006F,0x031B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
191 {0x01AF, {0x0055,0x031B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
192 {0x01B0, {0x0075,0x031B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
193 {0x01CD, {0x0041,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
194 {0x01CE, {0x0061,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
195 {0x01CF, {0x0049,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
196 {0x01D0, {0x0069,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
197 {0x01D1, {0x004F,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
198 {0x01D2, {0x006F,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
199 {0x01D3, {0x0055,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
200 {0x01D4, {0x0075,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
201 {0x01E2, {0x00C6,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
202 {0x01E3, {0x00E6,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
203 {0x01E6, {0x0047,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
204 {0x01E7, {0x0067,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
205 {0x01E8, {0x004B,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
206 {0x01E9, {0x006B,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
207 {0x01EA, {0x004F,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
208 {0x01EB, {0x006F,0x0328}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
209 {0x01EE, {0x01B7,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
210 {0x01EF, {0x0292,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
211 {0x01F0, {0x006A,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
212 {0x01F4, {0x0047,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
213 {0x01F5, {0x0067,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
214 {0x01F8, {0x004E,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
215 {0x01F9, {0x006E,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
216 {0x01FC, {0x00C6,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
217 {0x01FD, {0x00E6,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
218 {0x01FE, {0x00D8,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
219 {0x01FF, {0x00F8,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
220 {0x0200, {0x0041,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
221 {0x0201, {0x0061,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
222 {0x0202, {0x0041,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
223 {0x0203, {0x0061,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
224 {0x0204, {0x0045,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
225 {0x0205, {0x0065,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
226 {0x0206, {0x0045,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
227 {0x0207, {0x0065,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
228 {0x0208, {0x0049,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
229 {0x0209, {0x0069,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
230 {0x020A, {0x0049,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
231 {0x020B, {0x0069,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
232 {0x020C, {0x004F,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
233 {0x020D, {0x006F,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
234 {0x020E, {0x004F,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
235 {0x020F, {0x006F,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
236 {0x0210, {0x0052,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
237 {0x0211, {0x0072,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
238 {0x0212, {0x0052,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
239 {0x0213, {0x0072,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
240 {0x0214, {0x0055,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
241 {0x0215, {0x0075,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
242 {0x0216, {0x0055,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
243 {0x0217, {0x0075,0x0311}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
244 {0x0218, {0x0053,0x0326}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
245 {0x0219, {0x0073,0x0326}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
246 {0x021A, {0x0054,0x0326}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
247 {0x021B, {0x0074,0x0326}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
248 {0x021E, {0x0048,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
249 {0x021F, {0x0068,0x030C}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
250 {0x0226, {0x0041,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
251 {0x0227, {0x0061,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
252 {0x0228, {0x0045,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
253 {0x0229, {0x0065,0x0327}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
254 {0x022E, {0x004F,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
255 {0x022F, {0x006F,0x0307}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
256 {0x0232, {0x0059,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
257 {0x0233, {0x0079,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
258 {0x0344, {0x0308,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
259 {0x0385, {0x00A8,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
260 {0x0386, {0x0391,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
261 {0x0388, {0x0395,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
262 {0x0389, {0x0397,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
263 {0x038A, {0x0399,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
264 {0x038C, {0x039F,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
265 {0x038E, {0x03A5,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
266 {0x038F, {0x03A9,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
267 {0x03AA, {0x0399,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
268 {0x03AB, {0x03A5,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
269 {0x03AC, {0x03B1,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
270 {0x03AD, {0x03B5,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
271 {0x03AE, {0x03B7,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
272 {0x03AF, {0x03B9,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
273 {0x03CA, {0x03B9,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
274 {0x03CB, {0x03C5,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
275 {0x03CC, {0x03BF,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
276 {0x03CD, {0x03C5,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
277 {0x03CE, {0x03C9,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
278 {0x03D3, {0x03D2,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
279 {0x03D4, {0x03D2,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
280 {0x0400, {0x0415,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
281 {0x0401, {0x0415,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
282 {0x0403, {0x0413,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
283 {0x0407, {0x0406,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
284 {0x040C, {0x041A,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
285 {0x040D, {0x0418,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
286 {0x040E, {0x0423,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
287 {0x0419, {0x0418,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
288 {0x0439, {0x0438,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
289 {0x0450, {0x0435,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
290 {0x0451, {0x0435,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
291 {0x0453, {0x0433,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
292 {0x0457, {0x0456,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
293 {0x045C, {0x043A,0x0301}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
294 {0x045D, {0x0438,0x0300}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
295 {0x045E, {0x0443,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
296 {0x0476, {0x0474,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
297 {0x0477, {0x0475,0x030F}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
298 {0x04C1, {0x0416,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
299 {0x04C2, {0x0436,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
300 {0x04D0, {0x0410,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
301 {0x04D1, {0x0430,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
302 {0x04D2, {0x0410,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
303 {0x04D3, {0x0430,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
304 {0x04D6, {0x0415,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
305 {0x04D7, {0x0435,0x0306}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
306 {0x04DA, {0x04D8,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
307 {0x04DB, {0x04D9,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
308 {0x04DC, {0x0416,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
309 {0x04DD, {0x0436,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
310 {0x04DE, {0x0417,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
311 {0x04DF, {0x0437,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
312 {0x04E2, {0x0418,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
313 {0x04E3, {0x0438,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
314 {0x04E4, {0x0418,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
315 {0x04E5, {0x0438,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
316 {0x04E6, {0x041E,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
317 {0x04E7, {0x043E,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
318 {0x04EA, {0x04E8,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
319 {0x04EB, {0x04E9,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
320 {0x04EC, {0x042D,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
321 {0x04ED, {0x044D,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
322 {0x04EE, {0x0423,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
323 {0x04EF, {0x0443,0x0304}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
324 {0x04F0, {0x0423,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
325 {0x04F1, {0x0443,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
326 {0x04F2, {0x0423,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
327 {0x04F3, {0x0443,0x030B}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
328 {0x04F4, {0x0427,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
329 {0x04F5, {0x0447,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
330 {0x04F8, {0x042B,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
331 {0x04F9, {0x044B,0x0308}},
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
332 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
333
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
334
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
335 static const uint16_t * match( uint16_t c ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
336 size_t lo = 0, hi = std::size(normtable);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
337 do {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
338 size_t mid = (lo+hi)/2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
339 auto & rec = normtable[mid];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
340 if ( rec.form1 < c ) lo = mid + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
341 else if ( rec.form1 > c ) hi = mid;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
342 else return rec.form2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
343 } while( lo < hi );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
344 return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
345 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
346 namespace {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
347 class shortener {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
348 public:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
349 shortener() {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
350 for (auto& walk : normtable) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
351 m_data[walk.form2[0]][walk.form2[1]] = walk.form1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
352 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
353 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
354
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
355 std::map< uint16_t, std::map<uint16_t, uint16_t> > m_data;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
356 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
357 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
358
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
359 namespace pfc {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
360 pfc::string8 unicodeNormalizeD_Lite(const char* in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
361 pfc::string8 ret; ret.prealloc(1024);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
362 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
363 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
364 auto d = utf8_decode_char(in, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
365 if (d == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
366 in += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
367 const uint16_t* m = nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
368 if (c < 0x10000) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
369 m = match((uint16_t)c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
370 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
371 if (m != nullptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
372 ret.add_char(m[0]);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
373 ret.add_char(m[1]);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
374 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
375 ret.add_char(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
376 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
377 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
378
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
379 return ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
380 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
381 pfc::string8 unicodeNormalizeC_Lite(const char* in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
382 static shortener g_shortener; auto& data = g_shortener.m_data;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
383 pfc::string8 ret; ret.prealloc(strlen(in));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
384 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
385 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
386 auto d = pfc::utf8_decode_char(in, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
387 if (d == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
388 in += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
389 if ( c < 0x10000 ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
390 auto iter1 = data.find((uint16_t)c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
391 if (iter1 != data.end()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
392 unsigned next;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
393 auto d2 = pfc::utf8_decode_char(in, next);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
394 if (d2 != 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
395 auto& data2 = iter1->second;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
396 auto iter2 = data2.find(next);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
397 if (iter2 != data2.end()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
398 in += d2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
399 ret.add_char(iter2->second); continue;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
400 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
401 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
402 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
403 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
404 ret.add_char(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
405 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
406 return ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
407 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
408 bool stringContainsFormD(const char* in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
409 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
410 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
411 auto d = pfc::utf8_decode_char(in, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
412 if (d == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
413 if (is_modifier(c)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
414 in += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
415 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
416 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
417 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
418 #if ! defined(__APPLE__) && !defined(_WIN32)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
419 namespace pfc {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
420 pfc::string8 unicodeNormalizeD(const char* in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
421 return unicodeNormalizeD_Lite(in);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
422 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
423 pfc::string8 unicodeNormalizeC(const char* in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
424 return unicodeNormalizeC_Lite(in);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
425 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
426 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
427
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
428 #endif // #if ! defined(__APPLE__) && !defined(_WIN32)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
429