annotate foosdk/sdk/pfc/string_conv.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #include "pfc-lite.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2 #include "string_conv.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 #include "string_base.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6 namespace {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 template<typename t_char, bool isChecked = true>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8 class string_writer_t {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9 public:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 string_writer_t(t_char * p_buffer,t_size p_size) : m_buffer(p_buffer), m_size(p_size), m_writeptr(0) {}
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 void write(t_char p_char) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 if (isChecked) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 if (m_writeptr < m_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 m_buffer[m_writeptr++] = p_char;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 m_buffer[m_writeptr++] = p_char;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 void write_multi(const t_char * p_buffer,t_size p_count) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 if (isChecked) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 const t_size delta = pfc::min_t<t_size>(p_count,m_size-m_writeptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24 for(t_size n=0;n<delta;n++) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 m_buffer[m_writeptr++] = p_buffer[n];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 for(t_size n = 0; n < p_count; ++n) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 m_buffer[m_writeptr++] = p_buffer[n];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 void write_as_utf8(unsigned p_char) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 if (isChecked) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 char temp[6];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 t_size n = pfc::utf8_encode_char(p_char,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 write_multi(temp,n);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40 m_writeptr += pfc::utf8_encode_char(p_char, m_buffer + m_writeptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 void write_as_wide(unsigned p_char) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 if (isChecked) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 wchar_t temp[2];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 t_size n = pfc::wide_encode_char(p_char,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48 write_multi(temp,n);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 m_writeptr += pfc::wide_encode_char(p_char, m_buffer + m_writeptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 t_size finalize() {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 if (isChecked) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 if (m_size == 0) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57 t_size terminator = pfc::min_t<t_size>(m_writeptr,m_size-1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 m_buffer[terminator] = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59 return terminator;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 m_buffer[m_writeptr] = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62 return m_writeptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 bool is_overrun() const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 return m_writeptr >= m_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 private:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 t_char * m_buffer;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 t_size m_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71 t_size m_writeptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77 static constexpr uint16_t mappings1252[] = {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78 /*0x80*/ 0x20AC, // #EURO SIGN
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 /*0x81*/ 0, // #UNDEFINED
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 /*0x82*/ 0x201A, // #SINGLE LOW-9 QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 /*0x83*/ 0x0192, // #LATIN SMALL LETTER F WITH HOOK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 /*0x84*/ 0x201E, // #DOUBLE LOW-9 QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 /*0x85*/ 0x2026, // #HORIZONTAL ELLIPSIS
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 /*0x86*/ 0x2020, // #DAGGER
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 /*0x87*/ 0x2021, // #DOUBLE DAGGER
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 /*0x88*/ 0x02C6, // #MODIFIER LETTER CIRCUMFLEX ACCENT
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 /*0x89*/ 0x2030, // #PER MILLE SIGN
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88 /*0x8A*/ 0x0160, // #LATIN CAPITAL LETTER S WITH CARON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 /*0x8B*/ 0x2039, // #SINGLE LEFT-POINTING ANGLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90 /*0x8C*/ 0x0152, // #LATIN CAPITAL LIGATURE OE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 /*0x8D*/ 0, // #UNDEFINED
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 /*0x8E*/ 0x017D, // #LATIN CAPITAL LETTER Z WITH CARON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 /*0x8F*/ 0, // #UNDEFINED
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 /*0x90*/ 0, // #UNDEFINED
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 /*0x91*/ 0x2018, // #LEFT SINGLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 /*0x92*/ 0x2019, // #RIGHT SINGLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 /*0x93*/ 0x201C, // #LEFT DOUBLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 /*0x94*/ 0x201D, // #RIGHT DOUBLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 /*0x95*/ 0x2022, // #BULLET
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 /*0x96*/ 0x2013, // #EN DASH
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 /*0x97*/ 0x2014, // #EM DASH
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102 /*0x98*/ 0x02DC, // #SMALL TILDE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 /*0x99*/ 0x2122, // #TRADE MARK SIGN
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 /*0x9A*/ 0x0161, // #LATIN SMALL LETTER S WITH CARON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105 /*0x9B*/ 0x203A, // #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 /*0x9C*/ 0x0153, // #LATIN SMALL LIGATURE OE
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107 /*0x9D*/ 0, // #UNDEFINED
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108 /*0x9E*/ 0x017E, // #LATIN SMALL LETTER Z WITH CARON
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109 /*0x9F*/ 0x0178, // #LATIN CAPITAL LETTER Y WITH DIAERESIS
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 static bool charImport1252(uint32_t & unichar, char c) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 uint8_t uc = (uint8_t) c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 if (uc == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115 else if (uc < 0x80) {unichar = uc; return true;}
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 else if (uc < 0xA0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 uint32_t t = mappings1252[uc-0x80];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118 if (t == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119 unichar = t; return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
121 unichar = uc; return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
122 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
123 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
124
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
125 static bool charExport1252(char & c, uint32_t unichar) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
126 if (unichar == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
127 else if (unichar < 0x80 || (unichar >= 0xa0 && unichar <= 0xFF)) {c = (char)(uint8_t)unichar; return true;}
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
128 for(size_t walk = 0; walk < PFC_TABSIZE(mappings1252); ++walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
129 if (unichar == mappings1252[walk]) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
130 c = (char)(uint8_t)(walk + 0x80);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
131 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
132 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
133 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
134 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
135 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
136 struct asciiMap_t {uint16_t from; uint8_t to;};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
137 static constexpr asciiMap_t g_asciiMap[] = {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
138 {160,32},{161,33},{162,99},{164,36},{165,89},{166,124},{169,67},{170,97},{171,60},{173,45},{174,82},{178,50},{179,51},{183,46},{184,44},{185,49},{186,111},{187,62},{192,65},{193,65},{194,65},{195,65},{196,65},{197,65},{198,65},{199,67},{200,69},{201,69},{202,69},{203,69},{204,73},{205,73},{206,73},{207,73},{208,68},{209,78},{210,79},{211,79},{212,79},{213,79},{214,79},{216,79},{217,85},{218,85},{219,85},{220,85},{221,89},{224,97},{225,97},{226,97},{227,97},{228,97},{229,97},{230,97},{231,99},{232,101},{233,101},{234,101},{235,101},{236,105},{237,105},{238,105},{239,105},{241,110},{242,111},{243,111},{244,111},{245,111},{246,111},{248,111},{249,117},{250,117},{251,117},{252,117},{253,121},{255,121},{256,65},{257,97},{258,65},{259,97},{260,65},{261,97},{262,67},{263,99},{264,67},{265,99},{266,67},{267,99},{268,67},{269,99},{270,68},{271,100},{272,68},{273,100},{274,69},{275,101},{276,69},{277,101},{278,69},{279,101},{280,69},{281,101},{282,69},{283,101},{284,71},{285,103},{286,71},{287,103},{288,71},{289,103},{290,71},{291,103},{292,72},{293,104},{294,72},{295,104},{296,73},{297,105},{298,73},{299,105},{300,73},{301,105},{302,73},{303,105},{304,73},{305,105},{308,74},{309,106},{310,75},{311,107},{313,76},{314,108},{315,76},{316,108},{317,76},{318,108},{321,76},{322,108},{323,78},{324,110},{325,78},{326,110},{327,78},{328,110},{332,79},{333,111},{334,79},{335,111},{336,79},{337,111},{338,79},{339,111},{340,82},{341,114},{342,82},{343,114},{344,82},{345,114},{346,83},{347,115},{348,83},{349,115},{350,83},{351,115},{352,83},{353,115},{354,84},{355,116},{356,84},{357,116},{358,84},{359,116},{360,85},{361,117},{362,85},{363,117},{364,85},{365,117},{366,85},{367,117},{368,85},{369,117},{370,85},{371,117},{372,87},{373,119},{374,89},{375,121},{376,89},{377,90},{378,122},{379,90},{380,122},{381,90},{382,122},{384,98},{393,68},{401,70},{402,102},{407,73},{410,108},{415,79},{416,79},{417,111},{427,116},{430,84},{431,85},{432,117},{438,122},{461,65},{462,97},{463,73},{464,105},{465,79},{466,111},{467,85},{468,117},{469,85},{470,117},{471,85},{472,117},{473,85},{474,117},{475,85},{476,117},{478,65},{479,97},{484,71},{485,103},{486,71},{487,103},{488,75},{489,107},{490,79},{491,111},{492,79},{493,111},{496,106},{609,103},{697,39},{698,34},{700,39},{708,94},{710,94},{712,39},{715,96},{717,95},{732,126},{768,96},{770,94},{771,126},{782,34},{817,95},{818,95},{8192,32},{8193,32},{8194,32},{8195,32},{8196,32},{8197,32},{8198,32},{8208,45},{8209,45},{8211,45},{8212,45},{8216,39},{8217,39},{8218,44},{8220,34},{8221,34},{8222,34},{8226,46},{8230,46},{8242,39},{8245,96},{8249,60},{8250,62},{8482,84},{65281,33},{65282,34},{65283,35},{65284,36},{65285,37},{65286,38},{65287,39},{65288,40},{65289,41},{65290,42},{65291,43},{65292,44},{65293,45},{65294,46},{65295,47},{65296,48},{65297,49},{65298,50},{65299,51},{65300,52},{65301,53},{65302,54},{65303,55},{65304,56},{65305,57},{65306,58},{65307,59},{65308,60},{65309,61},{65310,62},{65312,64},{65313,65},{65314,66},{65315,67},{65316,68},{65317,69},{65318,70},{65319,71},{65320,72},{65321,73},{65322,74},{65323,75},{65324,76},{65325,77},{65326,78},{65327,79},{65328,80},{65329,81},{65330,82},{65331,83},{65332,84},{65333,85},{65334,86},{65335,87},{65336,88},{65337,89},{65338,90},{65339,91},{65340,92},{65341,93},{65342,94},{65343,95},{65344,96},{65345,97},{65346,98},{65347,99},{65348,100},{65349,101},{65350,102},{65351,103},{65352,104},{65353,105},{65354,106},{65355,107},{65356,108},{65357,109},{65358,110},{65359,111},{65360,112},{65361,113},{65362,114},{65363,115},{65364,116},{65365,117},{65366,118},{65367,119},{65368,120},{65369,121},{65370,122},{65371,123},{65372,124},{65373,125},{65374,126}};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
139
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
140 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
141
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
142 namespace pfc {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
143 namespace stringcvt {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
144
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
145 char charToASCII( unsigned c ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
146 if (c < 128) return (char)c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
147 unsigned lo = 0, hi = PFC_TABSIZE(g_asciiMap);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
148 while( lo < hi ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
149 const unsigned mid = (lo + hi) / 2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
150 const asciiMap_t entry = g_asciiMap[mid];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
151 if ( c > entry.from ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
152 lo = mid + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
153 } else if (c < entry.from) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
154 hi = mid;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
155 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
156 return (char)entry.to;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
157 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
158 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
159 return '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
160 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
161
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
162 t_size convert_utf8_to_wide(wchar_t * p_out,t_size p_out_size,const char * p_in,t_size p_in_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
163 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
164 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
165 string_writer_t<wchar_t> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
166
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
167 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
168 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
169 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
170 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
171 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
172 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
173 writer.write_as_wide(newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
174 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
175
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
176 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
177 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
178
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
179 t_size convert_utf8_to_wide_unchecked(wchar_t * p_out,const char * p_in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
180 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
181 string_writer_t<wchar_t,false> writer(p_out,SIZE_MAX);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
182
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
183 while(!writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
184 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
185 t_size delta = utf8_decode_char(p_in + inptr,newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
186 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
187 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
188 writer.write_as_wide(newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
189 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
190
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
191 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
192 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
193
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
194 t_size convert_wide_to_utf8(char * p_out,t_size p_out_size,const wchar_t * p_in,t_size p_in_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
195 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
196 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
197 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
198
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
199 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
200 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
201 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
202 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
203 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
204 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
205 writer.write_as_utf8(newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
206 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
207
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
208 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
209 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
210
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
211 t_size estimate_utf8_to_wide(const char * p_in) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
212 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
213 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
214 for(;;) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
215 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
216 t_size delta = utf8_decode_char(p_in + inptr,newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
217 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
218 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
219
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
220 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
221 wchar_t temp[2];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
222 retval += wide_encode_char(newchar,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
223 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
224 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
225 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
226 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
227
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
228 t_size estimate_utf8_to_wide(const char * p_in,t_size p_in_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
229 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
230 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
231 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
232 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
233 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
234 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
235 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
236 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
237 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
238
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
239 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
240 wchar_t temp[2];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
241 retval += wide_encode_char(newchar,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
242 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
243 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
244 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
245 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
246
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
247 t_size estimate_wide_to_utf8(const wchar_t * p_in,t_size p_in_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
248 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
249 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
250 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
251 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
252 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
253 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
254 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
255 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
256 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
257
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
258 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
259 char temp[6];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
260 delta = utf8_encode_char(newchar,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
261 if (delta == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
262 retval += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
263 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
264 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
265 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
266 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
267
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
268 t_size estimate_wide_to_win1252( const wchar_t * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
269 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
270 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
271 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
272 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
273 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
274 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
275 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
276 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
277 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
278
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
279 ++retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
280 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
281 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
282
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
283 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
284
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
285 t_size convert_wide_to_win1252( char * p_out, t_size p_out_size, const wchar_t * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
286 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
287 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
288 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
289
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
290 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
291 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
292 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
293 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
294 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
295 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
296
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
297 char temp;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
298 if (!charExport1252( temp, newchar )) temp = '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
299 writer.write( temp );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
300 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
301
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
302 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
303
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
304 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
305
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
306 t_size estimate_win1252_to_wide( const char * p_source, t_size p_source_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
307 return strlen_max_t( p_source, p_source_size ) + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
308 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
309
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
310 t_size convert_win1252_to_wide( wchar_t * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
311 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
312 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
313 string_writer_t<wchar_t> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
314
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
315 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
316 char inChar = p_in[inptr];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
317 if (inChar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
318 ++inptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
319
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
320 unsigned out;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
321 if (!charImport1252( out , inChar )) out = '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
322 writer.write_as_wide( out );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
323 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
324
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
325 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
326 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
327
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
328 t_size estimate_utf8_to_win1252( const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
329 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
330 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
331 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
332 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
333 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
334 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
335 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
336 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
337 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
338
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
339 ++retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
340 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
341 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
342 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
343 t_size convert_utf8_to_win1252( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
344 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
345 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
346 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
347
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
348 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
349 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
350 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
351 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
352 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
353 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
354
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
355 char temp;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
356 if (!charExport1252( temp, newchar )) temp = '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
357 writer.write( temp );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
358 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
359
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
360 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
361 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
362
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
363 t_size estimate_win1252_to_utf8( const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
364 const size_t insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
365 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
366 t_size retval = 1; // 1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
367 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
368 unsigned newchar;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
369 char c = p_in[inptr];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
370 if (c == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
371 ++inptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
372 if (!charImport1252( newchar, c)) newchar = '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
373
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
374 char temp[6];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
375 retval += pfc::utf8_encode_char( newchar, temp );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
376 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
377 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
378 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
379
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
380 t_size convert_win1252_to_utf8( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
381 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
382 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
383 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
384
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
385 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
386 char inChar = p_in[inptr];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
387 if (inChar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
388 ++inptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
389
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
390 unsigned out;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
391 if (!charImport1252( out , inChar )) out = '?';
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
392 writer.write_as_utf8( out );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
393 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
394
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
395 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
396 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
397
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
398 t_size estimate_utf8_to_ascii( const char * p_source, t_size p_source_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
399 return estimate_utf8_to_win1252( p_source, p_source_size );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
400 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
401 t_size convert_utf8_to_ascii( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
402 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
403 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
404 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
405
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
406 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
407 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
408 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
409 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
410 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
411 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
412
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
413 writer.write( charToASCII(newchar) );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
414 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
415
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
416 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
417 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
418
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
419
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
420
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
421 // 2016-05-16 additions
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
422 // Explicit UTF-16 converters
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
423 t_size estimate_utf16_to_utf8( const char16_t * p_in, size_t p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
424 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
425 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
426 t_size retval = 1;//1 for null terminator
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
427 while(inptr < insize) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
428 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
429 t_size delta = utf16_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
430 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
431 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
432 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
433
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
434 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
435 char temp[6];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
436 delta = utf8_encode_char(newchar,temp);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
437 if (delta == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
438 retval += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
439 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
440 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
441 return retval;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
442 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
443 t_size convert_utf16_to_utf8( char * p_out, size_t p_out_size, const char16_t * p_in, size_t p_in_size ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
444 const t_size insize = p_in_size;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
445 t_size inptr = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
446 string_writer_t<char> writer(p_out,p_out_size);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
447
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
448 while(inptr < insize && !writer.is_overrun()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
449 unsigned newchar = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
450 t_size delta = utf16_decode_char(p_in + inptr,&newchar,insize - inptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
451 if (delta == 0 || newchar == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
452 PFC_ASSERT(inptr + delta <= insize);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
453 inptr += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
454 writer.write_as_utf8(newchar);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
455 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
456
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
457 return writer.finalize();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
458 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
459
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
460 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
461 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
462
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
463 #ifdef _WINDOWS
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
464
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
465
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
466 namespace pfc {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
467 namespace stringcvt {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
468
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
469
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
470 t_size convert_codepage_to_wide(unsigned p_codepage,wchar_t * p_out,t_size p_out_size,const char * p_source,t_size p_source_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
471 if (p_out_size == 0) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
472 memset(p_out,0,p_out_size * sizeof(*p_out));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
473 MultiByteToWideChar(p_codepage,0,p_source, pfc::downcast_guarded<int>(p_source_size),p_out, pfc::downcast_guarded<int>(p_out_size));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
474 p_out[p_out_size-1] = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
475 return wcslen(p_out);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
476 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
477
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
478 t_size convert_wide_to_codepage(unsigned p_codepage,char * p_out,t_size p_out_size,const wchar_t * p_source,t_size p_source_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
479 if (p_out_size == 0) return 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
480 memset(p_out,0,p_out_size * sizeof(*p_out));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
481 WideCharToMultiByte(p_codepage,0,p_source,pfc::downcast_guarded<int>(p_source_size),p_out,pfc::downcast_guarded<int>(p_out_size),0,FALSE);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
482 p_out[p_out_size-1] = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
483 return strlen(p_out);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
484 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
485
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
486 t_size estimate_codepage_to_wide(unsigned p_codepage,const char * p_source,t_size p_source_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
487 return MultiByteToWideChar(p_codepage,0,p_source, pfc::downcast_guarded<int>(strlen_max(p_source,p_source_size)),0,0) + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
488 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
489 t_size estimate_wide_to_codepage(unsigned p_codepage,const wchar_t * p_source,t_size p_source_size) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
490 return WideCharToMultiByte(p_codepage,0,p_source, pfc::downcast_guarded<int>(wcslen_max(p_source,p_source_size)),0,0,0,FALSE) + 1;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
491 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
492 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
493
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
494 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
495
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
496 #endif //_WINDOWS
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
497
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
498 pfc::string_base & operator<<(pfc::string_base & p_fmt, const wchar_t * p_str) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
499 p_fmt.add_string(pfc::stringcvt::string_utf8_from_wide(p_str) ); return p_fmt;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
500 }