Mercurial > foo_out_sdl
comparison foosdk/sdk/pfc/string_conv.cpp @ 1:20d02a178406 default tip
*: check in everything else
yay
| author | Paper <paper@tflc.us> |
|---|---|
| date | Mon, 05 Jan 2026 02:15:46 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 0:e9bb126753e7 | 1:20d02a178406 |
|---|---|
| 1 #include "pfc-lite.h" | |
| 2 #include "string_conv.h" | |
| 3 #include "string_base.h" | |
| 4 | |
| 5 | |
| 6 namespace { | |
| 7 template<typename t_char, bool isChecked = true> | |
| 8 class string_writer_t { | |
| 9 public: | |
| 10 string_writer_t(t_char * p_buffer,t_size p_size) : m_buffer(p_buffer), m_size(p_size), m_writeptr(0) {} | |
| 11 | |
| 12 void write(t_char p_char) { | |
| 13 if (isChecked) { | |
| 14 if (m_writeptr < m_size) { | |
| 15 m_buffer[m_writeptr++] = p_char; | |
| 16 } | |
| 17 } else { | |
| 18 m_buffer[m_writeptr++] = p_char; | |
| 19 } | |
| 20 } | |
| 21 void write_multi(const t_char * p_buffer,t_size p_count) { | |
| 22 if (isChecked) { | |
| 23 const t_size delta = pfc::min_t<t_size>(p_count,m_size-m_writeptr); | |
| 24 for(t_size n=0;n<delta;n++) { | |
| 25 m_buffer[m_writeptr++] = p_buffer[n]; | |
| 26 } | |
| 27 } else { | |
| 28 for(t_size n = 0; n < p_count; ++n) { | |
| 29 m_buffer[m_writeptr++] = p_buffer[n]; | |
| 30 } | |
| 31 } | |
| 32 } | |
| 33 | |
| 34 void write_as_utf8(unsigned p_char) { | |
| 35 if (isChecked) { | |
| 36 char temp[6]; | |
| 37 t_size n = pfc::utf8_encode_char(p_char,temp); | |
| 38 write_multi(temp,n); | |
| 39 } else { | |
| 40 m_writeptr += pfc::utf8_encode_char(p_char, m_buffer + m_writeptr); | |
| 41 } | |
| 42 } | |
| 43 | |
| 44 void write_as_wide(unsigned p_char) { | |
| 45 if (isChecked) { | |
| 46 wchar_t temp[2]; | |
| 47 t_size n = pfc::wide_encode_char(p_char,temp); | |
| 48 write_multi(temp,n); | |
| 49 } else { | |
| 50 m_writeptr += pfc::wide_encode_char(p_char, m_buffer + m_writeptr); | |
| 51 } | |
| 52 } | |
| 53 | |
| 54 t_size finalize() { | |
| 55 if (isChecked) { | |
| 56 if (m_size == 0) return 0; | |
| 57 t_size terminator = pfc::min_t<t_size>(m_writeptr,m_size-1); | |
| 58 m_buffer[terminator] = 0; | |
| 59 return terminator; | |
| 60 } else { | |
| 61 m_buffer[m_writeptr] = 0; | |
| 62 return m_writeptr; | |
| 63 } | |
| 64 } | |
| 65 bool is_overrun() const { | |
| 66 return m_writeptr >= m_size; | |
| 67 } | |
| 68 private: | |
| 69 t_char * m_buffer; | |
| 70 t_size m_size; | |
| 71 t_size m_writeptr; | |
| 72 }; | |
| 73 | |
| 74 | |
| 75 | |
| 76 | |
| 77 static constexpr uint16_t mappings1252[] = { | |
| 78 /*0x80*/ 0x20AC, // #EURO SIGN | |
| 79 /*0x81*/ 0, // #UNDEFINED | |
| 80 /*0x82*/ 0x201A, // #SINGLE LOW-9 QUOTATION MARK | |
| 81 /*0x83*/ 0x0192, // #LATIN SMALL LETTER F WITH HOOK | |
| 82 /*0x84*/ 0x201E, // #DOUBLE LOW-9 QUOTATION MARK | |
| 83 /*0x85*/ 0x2026, // #HORIZONTAL ELLIPSIS | |
| 84 /*0x86*/ 0x2020, // #DAGGER | |
| 85 /*0x87*/ 0x2021, // #DOUBLE DAGGER | |
| 86 /*0x88*/ 0x02C6, // #MODIFIER LETTER CIRCUMFLEX ACCENT | |
| 87 /*0x89*/ 0x2030, // #PER MILLE SIGN | |
| 88 /*0x8A*/ 0x0160, // #LATIN CAPITAL LETTER S WITH CARON | |
| 89 /*0x8B*/ 0x2039, // #SINGLE LEFT-POINTING ANGLE QUOTATION MARK | |
| 90 /*0x8C*/ 0x0152, // #LATIN CAPITAL LIGATURE OE | |
| 91 /*0x8D*/ 0, // #UNDEFINED | |
| 92 /*0x8E*/ 0x017D, // #LATIN CAPITAL LETTER Z WITH CARON | |
| 93 /*0x8F*/ 0, // #UNDEFINED | |
| 94 /*0x90*/ 0, // #UNDEFINED | |
| 95 /*0x91*/ 0x2018, // #LEFT SINGLE QUOTATION MARK | |
| 96 /*0x92*/ 0x2019, // #RIGHT SINGLE QUOTATION MARK | |
| 97 /*0x93*/ 0x201C, // #LEFT DOUBLE QUOTATION MARK | |
| 98 /*0x94*/ 0x201D, // #RIGHT DOUBLE QUOTATION MARK | |
| 99 /*0x95*/ 0x2022, // #BULLET | |
| 100 /*0x96*/ 0x2013, // #EN DASH | |
| 101 /*0x97*/ 0x2014, // #EM DASH | |
| 102 /*0x98*/ 0x02DC, // #SMALL TILDE | |
| 103 /*0x99*/ 0x2122, // #TRADE MARK SIGN | |
| 104 /*0x9A*/ 0x0161, // #LATIN SMALL LETTER S WITH CARON | |
| 105 /*0x9B*/ 0x203A, // #SINGLE RIGHT-POINTING ANGLE QUOTATION MARK | |
| 106 /*0x9C*/ 0x0153, // #LATIN SMALL LIGATURE OE | |
| 107 /*0x9D*/ 0, // #UNDEFINED | |
| 108 /*0x9E*/ 0x017E, // #LATIN SMALL LETTER Z WITH CARON | |
| 109 /*0x9F*/ 0x0178, // #LATIN CAPITAL LETTER Y WITH DIAERESIS | |
| 110 }; | |
| 111 | |
| 112 static bool charImport1252(uint32_t & unichar, char c) { | |
| 113 uint8_t uc = (uint8_t) c; | |
| 114 if (uc == 0) return false; | |
| 115 else if (uc < 0x80) {unichar = uc; return true;} | |
| 116 else if (uc < 0xA0) { | |
| 117 uint32_t t = mappings1252[uc-0x80]; | |
| 118 if (t == 0) return false; | |
| 119 unichar = t; return true; | |
| 120 } else { | |
| 121 unichar = uc; return true; | |
| 122 } | |
| 123 } | |
| 124 | |
| 125 static bool charExport1252(char & c, uint32_t unichar) { | |
| 126 if (unichar == 0) return false; | |
| 127 else if (unichar < 0x80 || (unichar >= 0xa0 && unichar <= 0xFF)) {c = (char)(uint8_t)unichar; return true;} | |
| 128 for(size_t walk = 0; walk < PFC_TABSIZE(mappings1252); ++walk) { | |
| 129 if (unichar == mappings1252[walk]) { | |
| 130 c = (char)(uint8_t)(walk + 0x80); | |
| 131 return true; | |
| 132 } | |
| 133 } | |
| 134 return false; | |
| 135 } | |
| 136 struct asciiMap_t {uint16_t from; uint8_t to;}; | |
| 137 static constexpr asciiMap_t g_asciiMap[] = { | |
| 138 {160,32},{161,33},{162,99},{164,36},{165,89},{166,124},{169,67},{170,97},{171,60},{173,45},{174,82},{178,50},{179,51},{183,46},{184,44},{185,49},{186,111},{187,62},{192,65},{193,65},{194,65},{195,65},{196,65},{197,65},{198,65},{199,67},{200,69},{201,69},{202,69},{203,69},{204,73},{205,73},{206,73},{207,73},{208,68},{209,78},{210,79},{211,79},{212,79},{213,79},{214,79},{216,79},{217,85},{218,85},{219,85},{220,85},{221,89},{224,97},{225,97},{226,97},{227,97},{228,97},{229,97},{230,97},{231,99},{232,101},{233,101},{234,101},{235,101},{236,105},{237,105},{238,105},{239,105},{241,110},{242,111},{243,111},{244,111},{245,111},{246,111},{248,111},{249,117},{250,117},{251,117},{252,117},{253,121},{255,121},{256,65},{257,97},{258,65},{259,97},{260,65},{261,97},{262,67},{263,99},{264,67},{265,99},{266,67},{267,99},{268,67},{269,99},{270,68},{271,100},{272,68},{273,100},{274,69},{275,101},{276,69},{277,101},{278,69},{279,101},{280,69},{281,101},{282,69},{283,101},{284,71},{285,103},{286,71},{287,103},{288,71},{289,103},{290,71},{291,103},{292,72},{293,104},{294,72},{295,104},{296,73},{297,105},{298,73},{299,105},{300,73},{301,105},{302,73},{303,105},{304,73},{305,105},{308,74},{309,106},{310,75},{311,107},{313,76},{314,108},{315,76},{316,108},{317,76},{318,108},{321,76},{322,108},{323,78},{324,110},{325,78},{326,110},{327,78},{328,110},{332,79},{333,111},{334,79},{335,111},{336,79},{337,111},{338,79},{339,111},{340,82},{341,114},{342,82},{343,114},{344,82},{345,114},{346,83},{347,115},{348,83},{349,115},{350,83},{351,115},{352,83},{353,115},{354,84},{355,116},{356,84},{357,116},{358,84},{359,116},{360,85},{361,117},{362,85},{363,117},{364,85},{365,117},{366,85},{367,117},{368,85},{369,117},{370,85},{371,117},{372,87},{373,119},{374,89},{375,121},{376,89},{377,90},{378,122},{379,90},{380,122},{381,90},{382,122},{384,98},{393,68},{401,70},{402,102},{407,73},{410,108},{415,79},{416,79},{417,111},{427,116},{430,84},{431,85},{432,117},{438,122},{461,65},{462,97},{463,73},{464,105},{465,79},{466,111},{467,85},{468,117},{469,85},{470,117},{471,85},{472,117},{473,85},{474,117},{475,85},{476,117},{478,65},{479,97},{484,71},{485,103},{486,71},{487,103},{488,75},{489,107},{490,79},{491,111},{492,79},{493,111},{496,106},{609,103},{697,39},{698,34},{700,39},{708,94},{710,94},{712,39},{715,96},{717,95},{732,126},{768,96},{770,94},{771,126},{782,34},{817,95},{818,95},{8192,32},{8193,32},{8194,32},{8195,32},{8196,32},{8197,32},{8198,32},{8208,45},{8209,45},{8211,45},{8212,45},{8216,39},{8217,39},{8218,44},{8220,34},{8221,34},{8222,34},{8226,46},{8230,46},{8242,39},{8245,96},{8249,60},{8250,62},{8482,84},{65281,33},{65282,34},{65283,35},{65284,36},{65285,37},{65286,38},{65287,39},{65288,40},{65289,41},{65290,42},{65291,43},{65292,44},{65293,45},{65294,46},{65295,47},{65296,48},{65297,49},{65298,50},{65299,51},{65300,52},{65301,53},{65302,54},{65303,55},{65304,56},{65305,57},{65306,58},{65307,59},{65308,60},{65309,61},{65310,62},{65312,64},{65313,65},{65314,66},{65315,67},{65316,68},{65317,69},{65318,70},{65319,71},{65320,72},{65321,73},{65322,74},{65323,75},{65324,76},{65325,77},{65326,78},{65327,79},{65328,80},{65329,81},{65330,82},{65331,83},{65332,84},{65333,85},{65334,86},{65335,87},{65336,88},{65337,89},{65338,90},{65339,91},{65340,92},{65341,93},{65342,94},{65343,95},{65344,96},{65345,97},{65346,98},{65347,99},{65348,100},{65349,101},{65350,102},{65351,103},{65352,104},{65353,105},{65354,106},{65355,107},{65356,108},{65357,109},{65358,110},{65359,111},{65360,112},{65361,113},{65362,114},{65363,115},{65364,116},{65365,117},{65366,118},{65367,119},{65368,120},{65369,121},{65370,122},{65371,123},{65372,124},{65373,125},{65374,126}}; | |
| 139 | |
| 140 } | |
| 141 | |
| 142 namespace pfc { | |
| 143 namespace stringcvt { | |
| 144 | |
| 145 char charToASCII( unsigned c ) { | |
| 146 if (c < 128) return (char)c; | |
| 147 unsigned lo = 0, hi = PFC_TABSIZE(g_asciiMap); | |
| 148 while( lo < hi ) { | |
| 149 const unsigned mid = (lo + hi) / 2; | |
| 150 const asciiMap_t entry = g_asciiMap[mid]; | |
| 151 if ( c > entry.from ) { | |
| 152 lo = mid + 1; | |
| 153 } else if (c < entry.from) { | |
| 154 hi = mid; | |
| 155 } else { | |
| 156 return (char)entry.to; | |
| 157 } | |
| 158 } | |
| 159 return '?'; | |
| 160 } | |
| 161 | |
| 162 t_size convert_utf8_to_wide(wchar_t * p_out,t_size p_out_size,const char * p_in,t_size p_in_size) { | |
| 163 const t_size insize = p_in_size; | |
| 164 t_size inptr = 0; | |
| 165 string_writer_t<wchar_t> writer(p_out,p_out_size); | |
| 166 | |
| 167 while(inptr < insize && !writer.is_overrun()) { | |
| 168 unsigned newchar = 0; | |
| 169 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr); | |
| 170 if (delta == 0 || newchar == 0) break; | |
| 171 PFC_ASSERT(inptr + delta <= insize); | |
| 172 inptr += delta; | |
| 173 writer.write_as_wide(newchar); | |
| 174 } | |
| 175 | |
| 176 return writer.finalize(); | |
| 177 } | |
| 178 | |
| 179 t_size convert_utf8_to_wide_unchecked(wchar_t * p_out,const char * p_in) { | |
| 180 t_size inptr = 0; | |
| 181 string_writer_t<wchar_t,false> writer(p_out,SIZE_MAX); | |
| 182 | |
| 183 while(!writer.is_overrun()) { | |
| 184 unsigned newchar = 0; | |
| 185 t_size delta = utf8_decode_char(p_in + inptr,newchar); | |
| 186 if (delta == 0 || newchar == 0) break; | |
| 187 inptr += delta; | |
| 188 writer.write_as_wide(newchar); | |
| 189 } | |
| 190 | |
| 191 return writer.finalize(); | |
| 192 } | |
| 193 | |
| 194 t_size convert_wide_to_utf8(char * p_out,t_size p_out_size,const wchar_t * p_in,t_size p_in_size) { | |
| 195 const t_size insize = p_in_size; | |
| 196 t_size inptr = 0; | |
| 197 string_writer_t<char> writer(p_out,p_out_size); | |
| 198 | |
| 199 while(inptr < insize && !writer.is_overrun()) { | |
| 200 unsigned newchar = 0; | |
| 201 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 202 if (delta == 0 || newchar == 0) break; | |
| 203 PFC_ASSERT(inptr + delta <= insize); | |
| 204 inptr += delta; | |
| 205 writer.write_as_utf8(newchar); | |
| 206 } | |
| 207 | |
| 208 return writer.finalize(); | |
| 209 } | |
| 210 | |
| 211 t_size estimate_utf8_to_wide(const char * p_in) { | |
| 212 t_size inptr = 0; | |
| 213 t_size retval = 1;//1 for null terminator | |
| 214 for(;;) { | |
| 215 unsigned newchar = 0; | |
| 216 t_size delta = utf8_decode_char(p_in + inptr,newchar); | |
| 217 if (delta == 0 || newchar == 0) break; | |
| 218 inptr += delta; | |
| 219 | |
| 220 { | |
| 221 wchar_t temp[2]; | |
| 222 retval += wide_encode_char(newchar,temp); | |
| 223 } | |
| 224 } | |
| 225 return retval; | |
| 226 } | |
| 227 | |
| 228 t_size estimate_utf8_to_wide(const char * p_in,t_size p_in_size) { | |
| 229 const t_size insize = p_in_size; | |
| 230 t_size inptr = 0; | |
| 231 t_size retval = 1;//1 for null terminator | |
| 232 while(inptr < insize) { | |
| 233 unsigned newchar = 0; | |
| 234 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr); | |
| 235 if (delta == 0 || newchar == 0) break; | |
| 236 PFC_ASSERT(inptr + delta <= insize); | |
| 237 inptr += delta; | |
| 238 | |
| 239 { | |
| 240 wchar_t temp[2]; | |
| 241 retval += wide_encode_char(newchar,temp); | |
| 242 } | |
| 243 } | |
| 244 return retval; | |
| 245 } | |
| 246 | |
| 247 t_size estimate_wide_to_utf8(const wchar_t * p_in,t_size p_in_size) { | |
| 248 const t_size insize = p_in_size; | |
| 249 t_size inptr = 0; | |
| 250 t_size retval = 1;//1 for null terminator | |
| 251 while(inptr < insize) { | |
| 252 unsigned newchar = 0; | |
| 253 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 254 if (delta == 0 || newchar == 0) break; | |
| 255 PFC_ASSERT(inptr + delta <= insize); | |
| 256 inptr += delta; | |
| 257 | |
| 258 { | |
| 259 char temp[6]; | |
| 260 delta = utf8_encode_char(newchar,temp); | |
| 261 if (delta == 0) break; | |
| 262 retval += delta; | |
| 263 } | |
| 264 } | |
| 265 return retval; | |
| 266 } | |
| 267 | |
| 268 t_size estimate_wide_to_win1252( const wchar_t * p_in, t_size p_in_size ) { | |
| 269 const t_size insize = p_in_size; | |
| 270 t_size inptr = 0; | |
| 271 t_size retval = 1;//1 for null terminator | |
| 272 while(inptr < insize) { | |
| 273 unsigned newchar = 0; | |
| 274 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 275 if (delta == 0 || newchar == 0) break; | |
| 276 PFC_ASSERT(inptr + delta <= insize); | |
| 277 inptr += delta; | |
| 278 | |
| 279 ++retval; | |
| 280 } | |
| 281 return retval; | |
| 282 | |
| 283 } | |
| 284 | |
| 285 t_size convert_wide_to_win1252( char * p_out, t_size p_out_size, const wchar_t * p_in, t_size p_in_size ) { | |
| 286 const t_size insize = p_in_size; | |
| 287 t_size inptr = 0; | |
| 288 string_writer_t<char> writer(p_out,p_out_size); | |
| 289 | |
| 290 while(inptr < insize && !writer.is_overrun()) { | |
| 291 unsigned newchar = 0; | |
| 292 t_size delta = wide_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 293 if (delta == 0 || newchar == 0) break; | |
| 294 PFC_ASSERT(inptr + delta <= insize); | |
| 295 inptr += delta; | |
| 296 | |
| 297 char temp; | |
| 298 if (!charExport1252( temp, newchar )) temp = '?'; | |
| 299 writer.write( temp ); | |
| 300 } | |
| 301 | |
| 302 return writer.finalize(); | |
| 303 | |
| 304 } | |
| 305 | |
| 306 t_size estimate_win1252_to_wide( const char * p_source, t_size p_source_size ) { | |
| 307 return strlen_max_t( p_source, p_source_size ) + 1; | |
| 308 } | |
| 309 | |
| 310 t_size convert_win1252_to_wide( wchar_t * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) { | |
| 311 const t_size insize = p_in_size; | |
| 312 t_size inptr = 0; | |
| 313 string_writer_t<wchar_t> writer(p_out,p_out_size); | |
| 314 | |
| 315 while(inptr < insize && !writer.is_overrun()) { | |
| 316 char inChar = p_in[inptr]; | |
| 317 if (inChar == 0) break; | |
| 318 ++inptr; | |
| 319 | |
| 320 unsigned out; | |
| 321 if (!charImport1252( out , inChar )) out = '?'; | |
| 322 writer.write_as_wide( out ); | |
| 323 } | |
| 324 | |
| 325 return writer.finalize(); | |
| 326 } | |
| 327 | |
| 328 t_size estimate_utf8_to_win1252( const char * p_in, t_size p_in_size ) { | |
| 329 const t_size insize = p_in_size; | |
| 330 t_size inptr = 0; | |
| 331 t_size retval = 1;//1 for null terminator | |
| 332 while(inptr < insize) { | |
| 333 unsigned newchar = 0; | |
| 334 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr); | |
| 335 if (delta == 0 || newchar == 0) break; | |
| 336 PFC_ASSERT(inptr + delta <= insize); | |
| 337 inptr += delta; | |
| 338 | |
| 339 ++retval; | |
| 340 } | |
| 341 return retval; | |
| 342 } | |
| 343 t_size convert_utf8_to_win1252( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) { | |
| 344 const t_size insize = p_in_size; | |
| 345 t_size inptr = 0; | |
| 346 string_writer_t<char> writer(p_out,p_out_size); | |
| 347 | |
| 348 while(inptr < insize && !writer.is_overrun()) { | |
| 349 unsigned newchar = 0; | |
| 350 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr); | |
| 351 if (delta == 0 || newchar == 0) break; | |
| 352 PFC_ASSERT(inptr + delta <= insize); | |
| 353 inptr += delta; | |
| 354 | |
| 355 char temp; | |
| 356 if (!charExport1252( temp, newchar )) temp = '?'; | |
| 357 writer.write( temp ); | |
| 358 } | |
| 359 | |
| 360 return writer.finalize(); | |
| 361 } | |
| 362 | |
| 363 t_size estimate_win1252_to_utf8( const char * p_in, t_size p_in_size ) { | |
| 364 const size_t insize = p_in_size; | |
| 365 t_size inptr = 0; | |
| 366 t_size retval = 1; // 1 for null terminator | |
| 367 while(inptr < insize) { | |
| 368 unsigned newchar; | |
| 369 char c = p_in[inptr]; | |
| 370 if (c == 0) break; | |
| 371 ++inptr; | |
| 372 if (!charImport1252( newchar, c)) newchar = '?'; | |
| 373 | |
| 374 char temp[6]; | |
| 375 retval += pfc::utf8_encode_char( newchar, temp ); | |
| 376 } | |
| 377 return retval; | |
| 378 } | |
| 379 | |
| 380 t_size convert_win1252_to_utf8( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) { | |
| 381 const t_size insize = p_in_size; | |
| 382 t_size inptr = 0; | |
| 383 string_writer_t<char> writer(p_out,p_out_size); | |
| 384 | |
| 385 while(inptr < insize && !writer.is_overrun()) { | |
| 386 char inChar = p_in[inptr]; | |
| 387 if (inChar == 0) break; | |
| 388 ++inptr; | |
| 389 | |
| 390 unsigned out; | |
| 391 if (!charImport1252( out , inChar )) out = '?'; | |
| 392 writer.write_as_utf8( out ); | |
| 393 } | |
| 394 | |
| 395 return writer.finalize(); | |
| 396 } | |
| 397 | |
| 398 t_size estimate_utf8_to_ascii( const char * p_source, t_size p_source_size ) { | |
| 399 return estimate_utf8_to_win1252( p_source, p_source_size ); | |
| 400 } | |
| 401 t_size convert_utf8_to_ascii( char * p_out, t_size p_out_size, const char * p_in, t_size p_in_size ) { | |
| 402 const t_size insize = p_in_size; | |
| 403 t_size inptr = 0; | |
| 404 string_writer_t<char> writer(p_out,p_out_size); | |
| 405 | |
| 406 while(inptr < insize && !writer.is_overrun()) { | |
| 407 unsigned newchar = 0; | |
| 408 t_size delta = utf8_decode_char(p_in + inptr,newchar,insize - inptr); | |
| 409 if (delta == 0 || newchar == 0) break; | |
| 410 PFC_ASSERT(inptr + delta <= insize); | |
| 411 inptr += delta; | |
| 412 | |
| 413 writer.write( charToASCII(newchar) ); | |
| 414 } | |
| 415 | |
| 416 return writer.finalize(); | |
| 417 } | |
| 418 | |
| 419 | |
| 420 | |
| 421 // 2016-05-16 additions | |
| 422 // Explicit UTF-16 converters | |
| 423 t_size estimate_utf16_to_utf8( const char16_t * p_in, size_t p_in_size ) { | |
| 424 const t_size insize = p_in_size; | |
| 425 t_size inptr = 0; | |
| 426 t_size retval = 1;//1 for null terminator | |
| 427 while(inptr < insize) { | |
| 428 unsigned newchar = 0; | |
| 429 t_size delta = utf16_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 430 if (delta == 0 || newchar == 0) break; | |
| 431 PFC_ASSERT(inptr + delta <= insize); | |
| 432 inptr += delta; | |
| 433 | |
| 434 { | |
| 435 char temp[6]; | |
| 436 delta = utf8_encode_char(newchar,temp); | |
| 437 if (delta == 0) break; | |
| 438 retval += delta; | |
| 439 } | |
| 440 } | |
| 441 return retval; | |
| 442 } | |
| 443 t_size convert_utf16_to_utf8( char * p_out, size_t p_out_size, const char16_t * p_in, size_t p_in_size ) { | |
| 444 const t_size insize = p_in_size; | |
| 445 t_size inptr = 0; | |
| 446 string_writer_t<char> writer(p_out,p_out_size); | |
| 447 | |
| 448 while(inptr < insize && !writer.is_overrun()) { | |
| 449 unsigned newchar = 0; | |
| 450 t_size delta = utf16_decode_char(p_in + inptr,&newchar,insize - inptr); | |
| 451 if (delta == 0 || newchar == 0) break; | |
| 452 PFC_ASSERT(inptr + delta <= insize); | |
| 453 inptr += delta; | |
| 454 writer.write_as_utf8(newchar); | |
| 455 } | |
| 456 | |
| 457 return writer.finalize(); | |
| 458 } | |
| 459 | |
| 460 } | |
| 461 } | |
| 462 | |
| 463 #ifdef _WINDOWS | |
| 464 | |
| 465 | |
| 466 namespace pfc { | |
| 467 namespace stringcvt { | |
| 468 | |
| 469 | |
| 470 t_size convert_codepage_to_wide(unsigned p_codepage,wchar_t * p_out,t_size p_out_size,const char * p_source,t_size p_source_size) { | |
| 471 if (p_out_size == 0) return 0; | |
| 472 memset(p_out,0,p_out_size * sizeof(*p_out)); | |
| 473 MultiByteToWideChar(p_codepage,0,p_source, pfc::downcast_guarded<int>(p_source_size),p_out, pfc::downcast_guarded<int>(p_out_size)); | |
| 474 p_out[p_out_size-1] = 0; | |
| 475 return wcslen(p_out); | |
| 476 } | |
| 477 | |
| 478 t_size convert_wide_to_codepage(unsigned p_codepage,char * p_out,t_size p_out_size,const wchar_t * p_source,t_size p_source_size) { | |
| 479 if (p_out_size == 0) return 0; | |
| 480 memset(p_out,0,p_out_size * sizeof(*p_out)); | |
| 481 WideCharToMultiByte(p_codepage,0,p_source,pfc::downcast_guarded<int>(p_source_size),p_out,pfc::downcast_guarded<int>(p_out_size),0,FALSE); | |
| 482 p_out[p_out_size-1] = 0; | |
| 483 return strlen(p_out); | |
| 484 } | |
| 485 | |
| 486 t_size estimate_codepage_to_wide(unsigned p_codepage,const char * p_source,t_size p_source_size) { | |
| 487 return MultiByteToWideChar(p_codepage,0,p_source, pfc::downcast_guarded<int>(strlen_max(p_source,p_source_size)),0,0) + 1; | |
| 488 } | |
| 489 t_size estimate_wide_to_codepage(unsigned p_codepage,const wchar_t * p_source,t_size p_source_size) { | |
| 490 return WideCharToMultiByte(p_codepage,0,p_source, pfc::downcast_guarded<int>(wcslen_max(p_source,p_source_size)),0,0,0,FALSE) + 1; | |
| 491 } | |
| 492 } | |
| 493 | |
| 494 } | |
| 495 | |
| 496 #endif //_WINDOWS | |
| 497 | |
| 498 pfc::string_base & operator<<(pfc::string_base & p_fmt, const wchar_t * p_str) { | |
| 499 p_fmt.add_string(pfc::stringcvt::string_utf8_from_wide(p_str) ); return p_fmt; | |
| 500 } |
