annotate foosdk/sdk/pfc/pocket_char_ops.h @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #pragma once
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 // Standalone header (no dependencies) with implementations of PFC UTF-8 & UTF-16 manipulation routines
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5 static const uint8_t mask_tab[6] = { 0x80,0xE0,0xF0,0xF8,0xFC,0xFE };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 static const uint8_t val_tab[6] = { 0,0xC0,0xE0,0xF0,0xF8,0xFC };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9 size_t utf8_char_len_from_header(char p_c) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 size_t cnt = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 for (;;)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 if ((p_c & mask_tab[cnt]) == val_tab[cnt]) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 if (++cnt >= 6) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 return cnt + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 size_t utf8_decode_char(const char *p_utf8, unsigned & wide) noexcept {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 const uint8_t * utf8 = (const uint8_t*)p_utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 const size_t max = 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 if (utf8[0]<0x80) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 wide = utf8[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 return utf8[0]>0 ? 1 : 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 wide = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 unsigned res = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 unsigned n;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33 unsigned cnt = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 for (;;)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 if ((*utf8&mask_tab[cnt]) == val_tab[cnt]) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 if (++cnt >= max) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 cnt++;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 if (cnt == 2 && !(*utf8 & 0x1E)) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 if (cnt == 1)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 res = *utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 res = (0xFF >> (cnt + 1))&*utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48 for (n = 1; n<cnt; n++)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 if ((utf8[n] & 0xC0) != 0x80)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 if (!res && n == 2 && !((utf8[n] & 0x7F) >> (7 - cnt)))
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 res = (res << 6) | (utf8[n] & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 wide = res;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 return cnt;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 size_t utf8_decode_char(const char *p_utf8, unsigned & wide, size_t max) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 const uint8_t * utf8 = (const uint8_t*)p_utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67 if (max == 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 wide = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72 if (utf8[0]<0x80) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73 wide = utf8[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 return utf8[0]>0 ? 1 : 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76 if (max>6) max = 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77 wide = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 unsigned res = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 unsigned n;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 unsigned cnt = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 for (;;)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 if ((*utf8&mask_tab[cnt]) == val_tab[cnt]) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 if (++cnt >= max) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 cnt++;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 if (cnt == 2 && !(*utf8 & 0x1E)) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 if (cnt == 1)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 res = *utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 res = (0xFF >> (cnt + 1))&*utf8;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 for (n = 1; n<cnt; n++)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 if ((utf8[n] & 0xC0) != 0x80)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 if (!res && n == 2 && !((utf8[n] & 0x7F) >> (7 - cnt)))
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 res = (res << 6) | (utf8[n] & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 wide = res;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108 return cnt;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 size_t utf8_encode_char(unsigned wide, char * target) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 size_t count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 if (wide < 0x80)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 count = 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118 else if (wide < 0x800)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119 count = 2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 else if (wide < 0x10000)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
121 count = 3;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
122 else if (wide < 0x200000)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
123 count = 4;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
124 else if (wide < 0x4000000)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
125 count = 5;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
126 else if (wide <= 0x7FFFFFFF)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
127 count = 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
128 else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
129 return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
130 //if (count>max) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
131
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
132 if (target == 0)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
133 return count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
134
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
135 switch (count)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
136 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
137 case 6:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
138 target[5] = 0x80 | (wide & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
139 wide = wide >> 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
140 wide |= 0x4000000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
141 [[fallthrough]];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
142 case 5:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
143 target[4] = 0x80 | (wide & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
144 wide = wide >> 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
145 wide |= 0x200000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
146 [[fallthrough]];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
147 case 4:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
148 target[3] = 0x80 | (wide & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
149 wide = wide >> 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
150 wide |= 0x10000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
151 [[fallthrough]];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
152 case 3:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
153 target[2] = 0x80 | (wide & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
154 wide = wide >> 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
155 wide |= 0x800;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
156 [[fallthrough]];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
157 case 2:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
158 target[1] = 0x80 | (wide & 0x3F);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
159 wide = wide >> 6;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
160 wide |= 0xC0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
161 [[fallthrough]];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
162 case 1:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
163 target[0] = wide & 0xFF;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
164 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
165
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
166 return count;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
167 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
168
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
169 size_t utf16_encode_char(unsigned cur_wchar, char16_t * out) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
170 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
171 if (cur_wchar < 0x10000) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
172 *out = (char16_t)cur_wchar; return 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
173 } else if (cur_wchar < (1 << 20)) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
174 unsigned c = cur_wchar - 0x10000;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
175 //MSDN:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
176 //The first (high) surrogate is a 16-bit code value in the range U+D800 to U+DBFF. The second (low) surrogate is a 16-bit code value in the range U+DC00 to U+DFFF. Using surrogates, Unicode can support over one million characters. For more details about surrogates, refer to The Unicode Standard, version 2.0.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
177 out[0] = (char16_t)(0xD800 | (0x3FF & (c >> 10)));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
178 out[1] = (char16_t)(0xDC00 | (0x3FF & c));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
179 return 2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
180 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
181 *out = '?'; return 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
182 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
183 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
184
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
185 size_t utf16_decode_char(const char16_t * p_source, unsigned * p_out, size_t p_source_length) noexcept {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
186 if (p_source_length == 0) { *p_out = 0; return 0; } else if (p_source_length == 1) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
187 *p_out = p_source[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
188 return 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
189 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
190 size_t retval = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
191 unsigned decoded = p_source[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
192 if (decoded != 0)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
193 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
194 retval = 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
195 if ((decoded & 0xFC00) == 0xD800)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
196 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
197 unsigned low = p_source[1];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
198 if ((low & 0xFC00) == 0xDC00)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
199 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
200 decoded = 0x10000 + (((decoded & 0x3FF) << 10) | (low & 0x3FF));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
201 retval = 2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
202 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
203 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
204 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
205 *p_out = decoded;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
206 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
207 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
208 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
209
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
210 unsigned utf8_get_char(const char * src)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
211 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
212 unsigned rv = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
213 utf8_decode_char(src, rv);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
214 return rv;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
215 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
216
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
217
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
218 size_t utf8_char_len(const char * s, size_t max) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
219 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
220 unsigned dummy;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
221 return utf8_decode_char(s, dummy, max);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
222 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
223
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
224 size_t skip_utf8_chars(const char * ptr, size_t count) noexcept
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
225 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
226 size_t num = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
227 for (; count && ptr[num]; count--)
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
228 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
229 size_t d = utf8_char_len(ptr + num, (size_t)(-1));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
230 if (d <= 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
231 num += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
232 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
233 return num;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
234 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
235
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
236 bool is_valid_utf8(const char * param, size_t max) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
237 size_t walk = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
238 while (walk < max && param[walk] != 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
239 size_t d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
240 unsigned dummy;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
241 d = utf8_decode_char(param + walk, dummy, max - walk);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
242 if (d == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
243 walk += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
244 if (walk > max) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
245 // should not get here
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
246 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
247 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
248 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
249 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
250 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
251
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
252 bool is_canonical_utf8(const char * param, size_t max) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
253 char scratch[6];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
254 size_t walk = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
255 while (walk < max && param[walk] != 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
256 size_t d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
257 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
258 d = utf8_decode_char(param + walk, c, max - walk);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
259 if (d == 0) return false; // bad UTF-8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
260 walk += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
261 if (walk > max) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
262 // should not get here
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
263 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
264 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
265 if (utf8_encode_char(c, scratch) != d) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
266 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
267 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
268
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
269 }