comparison foosdk/sdk/pfc/string_base.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
comparison
equal deleted inserted replaced
0:e9bb126753e7 1:20d02a178406
1 #include "pfc-lite.h"
2 #include "string_base.h"
3 #include "pathUtils.h"
4 #include "primitives.h"
5 #include "other.h"
6 #include <set>
7 #include <math.h>
8 #include "splitString2.h"
9
10 namespace pfc {
11
12 bool string_base::is_valid_utf8() const { return pfc::is_valid_utf8(get_ptr()); }
13 t_size string_base::scan_filename() const { return pfc::scan_filename(get_ptr()); }
14 t_size string_base::find_first(char p_char, t_size p_start) const { return pfc::string_find_first(get_ptr(), p_char, p_start); }
15 t_size string_base::find_last(char p_char, t_size p_start) const { return pfc::string_find_last(get_ptr(), p_char, p_start); }
16 t_size string_base::find_first(const char* p_string, t_size p_start) const { return pfc::string_find_first(get_ptr(), p_string, p_start); }
17 t_size string_base::find_last(const char* p_string, t_size p_start) const { return pfc::string_find_last(get_ptr(), p_string, p_start); }
18 bool string_base::has_prefix(const char* prefix) const { return string_has_prefix(get_ptr(), prefix); }
19 bool string_base::has_prefix_i(const char* prefix) const { return string_has_prefix_i(get_ptr(), prefix); }
20 bool string_base::has_suffix(const char* suffix) const { return string_has_suffix(get_ptr(), suffix); }
21 bool string_base::has_suffix_i(const char* suffix) const { return string_has_suffix_i(get_ptr(), suffix); }
22 bool string_base::equals(const char* other) const { return strcmp(*this, other) == 0; }
23
24 void string_receiver::add_char(t_uint32 p_char)
25 {
26 char temp[8];
27 t_size len = utf8_encode_char(p_char,temp);
28 if (len>0) add_string(temp,len);
29 }
30
31 void string_base::skip_trailing_chars( const char * lstCharsStr ) {
32 std::set<unsigned> lstChars;
33 for ( ;; ) {
34 unsigned c;
35 auto delta = utf8_decode_char( lstCharsStr, c );
36 if ( delta == 0 ) break;
37 lstCharsStr += delta;
38 lstChars.insert( c );
39 }
40
41 const char * str = get_ptr();
42 t_size ptr,trunc = 0;
43 bool need_trunc = false;
44 for(ptr=0;str[ptr];)
45 {
46 unsigned c;
47 t_size delta = utf8_decode_char(str+ptr,c);
48 if (delta==0) break;
49 if ( lstChars.count( c ) > 0 )
50 {
51 if (!need_trunc) {
52 need_trunc = true;
53 trunc = ptr;
54 }
55 }
56 else
57 {
58 need_trunc = false;
59 }
60 ptr += delta;
61 }
62 if (need_trunc) truncate(trunc);
63 }
64
65 void string_base::skip_trailing_char(unsigned skip)
66 {
67 const char * str = get_ptr();
68 t_size ptr,trunc = 0;
69 bool need_trunc = false;
70 for(ptr=0;str[ptr];)
71 {
72 unsigned c;
73 t_size delta = utf8_decode_char(str+ptr,c);
74 if (delta==0) break;
75 if (c==skip)
76 {
77 if (!need_trunc) {
78 need_trunc = true;
79 trunc = ptr;
80 }
81 }
82 else
83 {
84 need_trunc = false;
85 }
86 ptr += delta;
87 }
88 if (need_trunc) truncate(trunc);
89 }
90
91 string8 format_time(uint64_t p_seconds) {
92 string8 ret;
93 t_uint64 length = p_seconds;
94 unsigned weeks,days,hours,minutes,seconds;
95
96 weeks = (unsigned)( ( length / (60*60*24*7) ) );
97 days = (unsigned)( ( length / (60*60*24) ) % 7 );
98 hours = (unsigned) ( ( length / (60 * 60) ) % 24);
99 minutes = (unsigned) ( ( length / (60 ) ) % 60 );
100 seconds = (unsigned) ( ( length ) % 60 );
101
102 if (weeks) {
103 ret << weeks << "wk ";
104 }
105 if (days || weeks) {
106 ret << days << "d ";
107 }
108 if (hours || days || weeks) {
109 ret << hours << ":" << format_uint(minutes,2) << ":" << format_uint(seconds,2);
110 } else {
111 ret << minutes << ":" << format_uint(seconds,2);
112 }
113 return ret;
114 }
115
116 bool is_path_separator(unsigned c)
117 {
118 #ifdef _WIN32
119 return c=='\\' || c=='/' || c=='|' || c==':';
120 #else
121 return c == '/';
122 #endif
123 }
124
125 bool is_path_bad_char(unsigned c)
126 {
127 #ifdef _WINDOWS
128 return c=='\\' || c=='/' || c=='|' || c==':' || c=='*' || c=='?' || c=='\"' || c=='>' || c=='<';
129 #else
130 return c=='/' || c=='*' || c=='?';
131 #endif
132 }
133
134
135
136 char * strdup_n(const char * src,t_size len)
137 {
138 len = strlen_max(src,len);
139 char * ret = (char*)malloc(len+1);
140 if (ret)
141 {
142 memcpy(ret,src,len);
143 ret[len]=0;
144 }
145 return ret;
146 }
147
148 string8 string_filename(const char * fn)
149 {
150 string8 ret;
151 fn += pfc::scan_filename(fn);
152 const char * ptr=fn,*dot=0;
153 while(*ptr && *ptr!='?')
154 {
155 if (*ptr=='.') dot=ptr;
156 ptr++;
157 }
158
159 if (dot && dot>fn) ret.set_string(fn,dot-fn);
160 else ret.set_string(fn);
161 return ret;
162 }
163
164 const char * extract_ext_v2( const char * filenameDotExt ) {
165 auto split = strrchr(filenameDotExt, '.');
166 return split ? split+1 : "";
167 }
168
169 string8 remove_ext_v2( const char * filenameDotExt ) {
170 auto split = strrchr(filenameDotExt, '.');
171 string8 ret;
172 if ( split ) ret.set_string_nc( filenameDotExt, split-filenameDotExt );
173 else ret = filenameDotExt;
174 return ret;
175 }
176
177 const char * filename_ext_v2( const char * fn, char slash ) {
178 if ( slash == 0 ) {
179 slash = pfc::io::path::getDefaultSeparator();
180 }
181 size_t split = pfc::string_find_last( fn, slash );
182 if ( split == SIZE_MAX ) return fn;
183 return fn + split + 1;
184 }
185
186 string8 string_filename_ext(const char * fn)
187 {
188 string8 ret;
189 fn += pfc::scan_filename(fn);
190 const char * ptr = fn;
191 while(*ptr && *ptr!='?') ptr++;
192 ret.set_string(fn,ptr-fn);
193 return ret;
194 }
195
196 size_t find_extension_offset(const char * src) {
197 const char * start = src + pfc::scan_filename(src);
198 const char * end = start + strlen(start);
199 const char * ptr = end - 1;
200 while (ptr > start && *ptr != '.')
201 {
202 if (*ptr == '?') end = ptr;
203 ptr--;
204 }
205
206 if (ptr >= start && *ptr == '.')
207 {
208 return ptr - src;
209 }
210
211 return SIZE_MAX;
212 }
213
214 string8 string_extension(const char * src)
215 {
216 string8 ret;
217 const char * start = src + pfc::scan_filename(src);
218 const char * end = start + strlen(start);
219 const char * ptr = end-1;
220 while(ptr>start && *ptr!='.')
221 {
222 if (*ptr=='?') end=ptr;
223 ptr--;
224 }
225
226 if (ptr>=start && *ptr=='.')
227 {
228 ptr++;
229 ret.set_string(ptr, end-ptr);
230 }
231 return ret;
232 }
233
234
235 bool has_path_bad_chars(const char * param)
236 {
237 while(*param)
238 {
239 if (is_path_bad_char(*param)) return true;
240 param++;
241 }
242 return false;
243 }
244
245 void float_to_string(char * out,t_size out_max,double val,unsigned precision,bool b_sign) {
246 pfc::string_fixed_t<63> temp;
247 t_size outptr;
248
249 if (out_max == 0) return;
250 out_max--;//for null terminator
251
252 outptr = 0;
253
254 if (outptr == out_max) {out[outptr]=0;return;}
255
256 if (val<0) {out[outptr++] = '-'; val = -val;}
257 else if (val > 0 && b_sign) {out[outptr++] = '+';}
258
259 if (outptr == out_max) {out[outptr]=0;return;}
260
261
262 {
263 double powval = pow((double)10.0,(double)precision);
264 temp << (t_int64)floor(val * powval + 0.5);
265 //_i64toa(blargh,temp,10);
266 }
267
268 const t_size temp_len = temp.length();
269 if (temp_len <= precision)
270 {
271 out[outptr++] = '0';
272 if (outptr == out_max) {out[outptr]=0;return;}
273 out[outptr++] = '.';
274 if (outptr == out_max) {out[outptr]=0;return;}
275 t_size d;
276 for(d=precision-temp_len;d;d--)
277 {
278 out[outptr++] = '0';
279 if (outptr == out_max) {out[outptr]=0;return;}
280 }
281 for(d=0;d<temp_len;d++)
282 {
283 out[outptr++] = temp[d];
284 if (outptr == out_max) {out[outptr]=0;return;}
285 }
286 }
287 else
288 {
289 t_size d = temp_len;
290 const char * src = temp;
291 while(*src)
292 {
293 if (d-- == precision)
294 {
295 out[outptr++] = '.';
296 if (outptr == out_max) {out[outptr]=0;return;}
297 }
298 out[outptr++] = *(src++);
299 if (outptr == out_max) {out[outptr]=0;return;}
300 }
301 }
302 out[outptr] = 0;
303 }
304
305
306
307 static double pfc_string_to_float_internal(const char * src) noexcept
308 {
309 bool neg = false;
310 t_int64 val = 0;
311 int div = 0;
312 bool got_dot = false;
313
314 while(*src==' ') src++;
315
316 if (*src=='-') {neg = true;src++;}
317 else if (*src=='+') src++;
318
319 while(*src)
320 {
321 if (*src>='0' && *src<='9')
322 {
323 int d = *src - '0';
324 val = val * 10 + d;
325 if (got_dot) div--;
326 src++;
327 }
328 else if (*src=='.' || *src==',')
329 {
330 if (got_dot) break;
331 got_dot = true;
332 src++;
333 }
334 else if (*src=='E' || *src=='e')
335 {
336 src++;
337 div += atoi(src);
338 break;
339 }
340 else break;
341 }
342 if (neg) val = -val;
343
344 if (val != 0) {
345 // SPECIAL FIX: ensure 0.2 and 0.200000 return the EXACT same float
346 while (val % 10 == 0) {
347 val /= 10; ++div;
348 }
349 }
350 return (double) val * exp_int(10, div);
351 }
352
353 double string_to_float(const char * src) noexcept {
354 return pfc_string_to_float_internal(src);
355 }
356 double string_to_float(const char * src,t_size max) noexcept {
357 char blargh[128];
358 if (max > 127) max = 127;
359 t_size walk;
360 for(walk = 0; walk < max && src[walk]; walk++) blargh[walk] = src[walk];
361 blargh[walk] = 0;
362 return pfc_string_to_float_internal(blargh);
363 }
364
365
366
367 void string_base::convert_to_lower_ascii(const char * src,char replace)
368 {
369 reset();
370 PFC_ASSERT(replace>0);
371 while(*src)
372 {
373 unsigned c;
374 t_size delta = utf8_decode_char(src,c);
375 if (delta==0) {c = replace; delta = 1;}
376 else if (c>=0x80) c = replace;
377 add_byte((char)c);
378 src += delta;
379 }
380 }
381
382 void convert_to_lower_ascii(const char * src,t_size max,char * out,char replace)
383 {
384 t_size ptr = 0;
385 PFC_ASSERT(replace>0);
386 while(ptr<max && src[ptr])
387 {
388 unsigned c;
389 t_size delta = utf8_decode_char(src+ptr,c,max-ptr);
390 if (delta==0) {c = replace; delta = 1;}
391 else if (c>=0x80) c = replace;
392 *(out++) = (char)c;
393 ptr += delta;
394 }
395 *out = 0;
396 }
397
398 t_size strstr_ex(const char * p_string,t_size p_string_len,const char * p_substring,t_size p_substring_len) noexcept
399 {
400 p_string_len = strlen_max(p_string,p_string_len);
401 p_substring_len = strlen_max(p_substring,p_substring_len);
402 t_size index = 0;
403 while(index + p_substring_len <= p_string_len)
404 {
405 if (memcmp(p_string+index,p_substring,p_substring_len) == 0) return index;
406 t_size delta = utf8_char_len(p_string+index,p_string_len - index);
407 if (delta == 0) break;
408 index += delta;
409 }
410 return SIZE_MAX;
411 }
412
413 unsigned atoui_ex(const char * p_string,t_size p_string_len) noexcept
414 {
415 unsigned ret = 0; t_size ptr = 0;
416 while(ptr<p_string_len)
417 {
418 char c = p_string[ptr];
419 if (! ( c >= '0' && c <= '9' ) ) break;
420 ret = ret * 10 + (unsigned)( c - '0' );
421 ptr++;
422 }
423 return ret;
424 }
425
426 int strcmp_nc(const char* p1, size_t n1, const char * p2, size_t n2) noexcept {
427 t_size idx = 0;
428 for(;;)
429 {
430 if (idx == n1 && idx == n2) return 0;
431 else if (idx == n1) return -1;//end of param1
432 else if (idx == n2) return 1;//end of param2
433
434 char c1 = p1[idx], c2 = p2[idx];
435 if (c1<c2) return -1;
436 else if (c1>c2) return 1;
437
438 idx++;
439 }
440 }
441
442 int strcmp_ex(const char* p1,t_size n1,const char* p2,t_size n2) noexcept
443 {
444 n1 = strlen_max(p1,n1); n2 = strlen_max(p2,n2);
445 return strcmp_nc(p1, n1, p2, n2);
446 }
447
448 t_uint64 atoui64_ex(const char * src,t_size len) noexcept {
449 len = strlen_max(src,len);
450 t_uint64 ret = 0, mul = 1;
451 t_size ptr = len;
452 t_size start = 0;
453 // start += skip_spacing(src+start,len-start);
454
455 while(ptr>start)
456 {
457 char c = src[--ptr];
458 if (c>='0' && c<='9')
459 {
460 ret += (c-'0') * mul;
461 mul *= 10;
462 }
463 else
464 {
465 ret = 0;
466 mul = 1;
467 }
468 }
469 return ret;
470 }
471
472
473 t_int64 atoi64_ex(const char * src,t_size len) noexcept
474 {
475 len = strlen_max(src,len);
476 t_int64 ret = 0, mul = 1;
477 t_size ptr = len;
478 t_size start = 0;
479 bool neg = false;
480 // start += skip_spacing(src+start,len-start);
481 if (start < len && src[start] == '-') {neg = true; start++;}
482 // start += skip_spacing(src+start,len-start);
483
484 while(ptr>start)
485 {
486 char c = src[--ptr];
487 if (c>='0' && c<='9')
488 {
489 ret += (c-'0') * mul;
490 mul *= 10;
491 }
492 else
493 {
494 ret = 0;
495 mul = 1;
496 }
497 }
498 return neg ? -ret : ret;
499 }
500
501
502 string8 format_float(double p_val,unsigned p_width,unsigned p_prec)
503 {
504 string8 m_buffer;
505 char temp[64];
506 float_to_string(temp,64,p_val,p_prec,false);
507 temp[63] = 0;
508 t_size len = strlen(temp);
509 if (len < p_width)
510 m_buffer.add_chars(' ',p_width-len);
511 m_buffer += temp;
512 return m_buffer;
513 }
514
515 char format_hex_char(unsigned p_val)
516 {
517 PFC_ASSERT(p_val < 16);
518 return (p_val < 10) ? (char)p_val + '0' : (char)p_val - 10 + 'A';
519 }
520
521 format_int_t format_hex(t_uint64 p_val,unsigned p_width)
522 {
523 format_int_t ret;
524
525 if (p_width > 16) p_width = 16;
526 else if (p_width == 0) p_width = 1;
527 char temp[16];
528 unsigned n;
529 for(n=0;n<16;n++)
530 {
531 temp[15-n] = format_hex_char((unsigned)(p_val & 0xF));
532 p_val >>= 4;
533 }
534
535 for(n=0;n<16 && temp[n] == '0';n++) {}
536
537 if (n > 16 - p_width) n = 16 - p_width;
538
539 char * out = ret.m_buffer;
540 for(;n<16;n++)
541 *(out++) = temp[n];
542 *out = 0;
543 return ret;
544 }
545
546 char format_hex_char_lowercase(unsigned p_val)
547 {
548 PFC_ASSERT(p_val < 16);
549 return (p_val < 10) ? (char)p_val + '0' : (char)p_val - 10 + 'a';
550 }
551
552 format_int_t format_hex_lowercase(t_uint64 p_val,unsigned p_width)
553 {
554 format_int_t ret;
555 if (p_width > 16) p_width = 16;
556 else if (p_width == 0) p_width = 1;
557 char temp[16];
558 unsigned n;
559 for(n=0;n<16;n++)
560 {
561 temp[15-n] = format_hex_char_lowercase((unsigned)(p_val & 0xF));
562 p_val >>= 4;
563 }
564
565 for(n=0;n<16 && temp[n] == '0';n++) {}
566
567 if (n > 16 - p_width) n = 16 - p_width;
568
569 char * out = ret.m_buffer;
570 for(;n<16;n++)
571 *(out++) = temp[n];
572 *out = 0;
573 return ret;
574 }
575
576 format_int_t format_uint(t_uint64 val,unsigned p_width,unsigned p_base)
577 {
578 format_int_t ret;
579
580 enum {max_width = PFC_TABSIZE(ret.m_buffer) - 1};
581
582 if (p_width > max_width) p_width = max_width;
583 else if (p_width == 0) p_width = 1;
584
585 char temp[max_width];
586
587 unsigned n;
588 for(n=0;n<max_width;n++)
589 {
590 temp[max_width-1-n] = format_hex_char((unsigned)(val % p_base));
591 val /= p_base;
592 }
593
594 for(n=0;n<max_width && temp[n] == '0';n++) {}
595
596 if (n > max_width - p_width) n = max_width - p_width;
597
598 char * out = ret.m_buffer;
599
600 for(;n<max_width;n++)
601 *(out++) = temp[n];
602 *out = 0;
603
604 return ret;
605 }
606
607 string8 format_fixedpoint(t_int64 p_val,unsigned p_point)
608 {
609 string8 m_buffer;
610 unsigned div = 1;
611 for(unsigned n=0;n<p_point;n++) div *= 10;
612
613 if (p_val < 0) {m_buffer << "-";p_val = -p_val;}
614
615
616 m_buffer << format_int(p_val / div) << "." << format_int(p_val % div, p_point);
617 return m_buffer;
618 }
619
620
621 format_int_t format_int(t_int64 p_val,unsigned p_width,unsigned p_base)
622 {
623 format_int_t ret;
624 bool neg = false;
625 t_uint64 val;
626 if (p_val < 0) {neg = true; val = (t_uint64)(-p_val);}
627 else val = (t_uint64)p_val;
628
629 enum {max_width = PFC_TABSIZE(ret.m_buffer) - 1};
630
631 if (p_width > max_width) p_width = max_width;
632 else if (p_width == 0) p_width = 1;
633
634 if (neg && p_width > 1) p_width --;
635
636 char temp[max_width];
637
638 unsigned n;
639 for(n=0;n<max_width;n++)
640 {
641 temp[max_width-1-n] = format_hex_char((unsigned)(val % p_base));
642 val /= p_base;
643 }
644
645 for(n=0;n<max_width && temp[n] == '0';n++) {}
646
647 if (n > max_width - p_width) n = max_width - p_width;
648
649 char * out = ret.m_buffer;
650
651 if (neg) *(out++) = '-';
652
653 for(;n<max_width;n++)
654 *(out++) = temp[n];
655 *out = 0;
656
657 return ret;
658 }
659
660 string8 format_hexdump_lowercase(const void * p_buffer,t_size p_bytes,const char * p_spacing)
661 {
662 string8 m_formatter;
663 t_size n;
664 const t_uint8 * buffer = (const t_uint8*)p_buffer;
665 for(n=0;n<p_bytes;n++)
666 {
667 if (n > 0 && p_spacing != 0) m_formatter << p_spacing;
668 m_formatter << format_hex_lowercase(buffer[n],2);
669 }
670 return m_formatter;
671 }
672
673 string8 format_hexdump(const void * p_buffer,t_size p_bytes,const char * p_spacing)
674 {
675 string8 m_formatter;
676 t_size n;
677 const t_uint8 * buffer = (const t_uint8*)p_buffer;
678 for(n=0;n<p_bytes;n++)
679 {
680 if (n > 0 && p_spacing != 0) m_formatter << p_spacing;
681 m_formatter << format_hex(buffer[n],2);
682 }
683 return m_formatter;
684 }
685
686
687
688 string8 string_replace_extension(const char * p_path,const char * p_ext)
689 {
690 string8 m_data;
691 m_data = p_path;
692 t_size dot = m_data.find_last('.');
693 if (dot < m_data.scan_filename())
694 {//argh
695 m_data += ".";
696 m_data += p_ext;
697 }
698 else
699 {
700 m_data.truncate(dot+1);
701 m_data += p_ext;
702 }
703 return m_data;
704 }
705
706 string8 string_directory(const char * p_path)
707 {
708 string8 ret;
709 t_size ptr = scan_filename(p_path);
710 if (ptr > 1) {
711 if (is_path_separator(p_path[ptr-1]) && !is_path_separator(p_path[ptr-2])) --ptr;
712 }
713 ret.set_string(p_path,ptr);
714 return ret;
715 }
716
717 t_size scan_filename(const char * ptr)
718 {
719 t_size n;
720 t_size _used = strlen(ptr);
721 for(n=_used;n!=0;n--)
722 {
723 if (is_path_separator(ptr[n-1])) return n;
724 }
725 return 0;
726 }
727
728
729
730 t_size string_find_first(const char * p_string,char p_tofind,t_size p_start) {
731 for(t_size walk = p_start; p_string[walk]; ++walk) {
732 if (p_string[walk] == p_tofind) return walk;
733 }
734 return SIZE_MAX;
735 }
736 t_size string_find_last(const char * p_string,char p_tofind,t_size p_start) {
737 return string_find_last_ex(p_string,SIZE_MAX,&p_tofind,1,p_start);
738 }
739 t_size string_find_first(const char * p_string,const char * p_tofind,t_size p_start) {
740 return string_find_first_ex(p_string,SIZE_MAX,p_tofind,SIZE_MAX,p_start);
741 }
742 t_size string_find_last(const char * p_string,const char * p_tofind,t_size p_start) {
743 return string_find_last_ex(p_string,SIZE_MAX,p_tofind,SIZE_MAX,p_start);
744 }
745
746 t_size string_find_first_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start) {
747 for(t_size walk = p_start; walk < p_string_length && p_string[walk]; ++walk) {
748 if (p_string[walk] == p_tofind) return walk;
749 }
750 return SIZE_MAX;
751 }
752 t_size string_find_last_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start) {
753 return string_find_last_ex(p_string,p_string_length,&p_tofind,1,p_start);
754 }
755 t_size string_find_first_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start) {
756 p_string_length = strlen_max(p_string,p_string_length); p_tofind_length = strlen_max(p_tofind,p_tofind_length);
757 if (p_string_length >= p_tofind_length) {
758 t_size max = p_string_length - p_tofind_length;
759 for(t_size walk = p_start; walk <= max; walk++) {
760 if (_strcmp_partial_ex(p_string+walk,p_string_length-walk,p_tofind,p_tofind_length) == 0) return walk;
761 }
762 }
763 return SIZE_MAX;
764 }
765 t_size string_find_last_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start) {
766 p_string_length = strlen_max(p_string,p_string_length); p_tofind_length = strlen_max(p_tofind,p_tofind_length);
767 if (p_string_length >= p_tofind_length) {
768 t_size max = min_t<t_size>(p_string_length - p_tofind_length,p_start);
769 for(t_size walk = max; walk != (t_size)(-1); walk--) {
770 if (_strcmp_partial_ex(p_string+walk,p_string_length-walk,p_tofind,p_tofind_length) == 0) return walk;
771 }
772 }
773 return SIZE_MAX;
774 }
775
776 t_size string_find_first_nc(const char * p_string,t_size p_string_length,char c,t_size p_start) {
777 for(t_size walk = p_start; walk < p_string_length; walk++) {
778 if (p_string[walk] == c) return walk;
779 }
780 return SIZE_MAX;
781 }
782
783 t_size string_find_first_nc(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start) {
784 if (p_string_length >= p_tofind_length) {
785 t_size max = p_string_length - p_tofind_length;
786 for(t_size walk = p_start; walk <= max; walk++) {
787 if (memcmp(p_string+walk, p_tofind, p_tofind_length) == 0) return walk;
788 }
789 }
790 return SIZE_MAX;
791 }
792
793
794 bool string_is_numeric(const char * p_string,t_size p_length) noexcept {
795 bool retval = false;
796 for(t_size walk = 0; walk < p_length && p_string[walk] != 0; walk++) {
797 if (!char_is_numeric(p_string[walk])) {retval = false; break;}
798 retval = true;
799 }
800 return retval;
801 }
802
803
804 void string_base::end_with(char p_char) {
805 if (!ends_with(p_char)) add_byte(p_char);
806 }
807 bool string_base::ends_with(char c) const {
808 t_size length = get_length();
809 return length > 0 && get_ptr()[length-1] == c;
810 }
811
812 void string_base::end_with_slash() {
813 end_with( io::path::getDefaultSeparator() );
814 }
815
816 char string_base::last_char() const {
817 size_t l = this->length();
818 if (l == 0) return 0;
819 return this->get_ptr()[l-1];
820 }
821 void string_base::truncate_last_char() {
822 size_t l = this->length();
823 if (l > 0) this->truncate( l - 1 );
824 }
825
826 void string_base::truncate_number_suffix() {
827 size_t l = this->length();
828 const char * const p = this->get_ptr();
829 while( l > 0 && char_is_numeric( p[l-1] ) ) --l;
830 truncate( l );
831 }
832
833 bool is_multiline(const char * p_string,t_size p_len) {
834 for(t_size n = 0; n < p_len && p_string[n]; n++) {
835 switch(p_string[n]) {
836 case '\r':
837 case '\n':
838 return true;
839 }
840 }
841 return false;
842 }
843
844 static t_uint64 pow10_helper(unsigned p_extra) {
845 t_uint64 ret = 1;
846 for(unsigned n = 0; n < p_extra; n++ ) ret *= 10;
847 return ret;
848 }
849
850 static uint64_t safeMulAdd(uint64_t prev, unsigned scale, uint64_t add) {
851 if (add >= scale || scale == 0) throw pfc::exception_invalid_params();
852 uint64_t v = prev * scale + add;
853 if (v / scale != prev) throw pfc::exception_invalid_params();
854 return v;
855 }
856
857 static size_t parseNumber(const char * in, uint64_t & outNumber) {
858 size_t walk = 0;
859 uint64_t total = 0;
860 for (;;) {
861 char c = in[walk];
862 if (!pfc::char_is_numeric(c)) break;
863 unsigned v = (unsigned)(c - '0');
864 uint64_t newVal = total * 10 + v;
865 if (newVal / 10 != total) throw pfc::exception_overflow();
866 total = newVal;
867 ++walk;
868 }
869 outNumber = total;
870 return walk;
871 }
872
873 double parse_timecode(const char * in) {
874 char separator = 0;
875 uint64_t seconds = 0;
876 unsigned colons = 0;
877 for (;;) {
878 uint64_t number = 0;
879 size_t digits = parseNumber(in, number);
880 if (digits == 0) throw pfc::exception_invalid_params();
881 in += digits;
882 char nextSeparator = *in;
883 switch (separator) { // *previous* separator
884 case '.':
885 if (nextSeparator != 0) throw pfc::exception_bug_check();
886 return (double)seconds + (double)pfc::exp_int(10, -(int)digits) * number;
887 case 0: // is first number in the string
888 seconds = number;
889 break;
890 case ':':
891 if (colons == 2) throw pfc::exception_invalid_params();
892 ++colons;
893 seconds = safeMulAdd(seconds, 60, number);
894 break;
895 }
896
897 if (nextSeparator == 0) {
898 // end of string
899 return (double)seconds;
900 }
901
902 ++in;
903 separator = nextSeparator;
904 }
905 }
906
907 string8 format_time_ex(double p_seconds,unsigned p_extra) {
908 string8 ret;
909 if (p_seconds < 0) {ret << "-"; p_seconds = -p_seconds;}
910 t_uint64 pow10 = pow10_helper(p_extra);
911 t_uint64 ticks = pfc::rint64(pow10 * p_seconds);
912
913 ret << pfc::format_time(ticks / pow10);
914 if (p_extra>0) {
915 ret << "." << pfc::format_uint(ticks % pow10, p_extra);
916 }
917 return ret;
918 }
919
920 void stringToUpperHere(string_base& p_out, const char* p_source, t_size p_sourceLen) {
921 p_out.clear();
922 stringToUpperAppend(p_out, p_source, p_sourceLen);
923 }
924 void stringToLowerHere(string_base& p_out, const char* p_source, t_size p_sourceLen) {
925 p_out.clear();
926 stringToLowerAppend(p_out, p_source, p_sourceLen);
927 }
928
929 void stringToUpperAppend(string_base & out, const char * src, t_size len) {
930 while(len && *src) {
931 unsigned c; t_size d;
932 d = utf8_decode_char(src,c,len);
933 if (d==0 || d>len) break;
934 out.add_char(charUpper(c));
935 src+=d;
936 len-=d;
937 }
938 }
939 void stringToLowerAppend(string_base & out, const char * src, t_size len) {
940 while(len && *src) {
941 unsigned c; t_size d;
942 d = utf8_decode_char(src,c,len);
943 if (d==0 || d>len) break;
944 out.add_char(charLower(c));
945 src+=d;
946 len-=d;
947 }
948 }
949
950 string8 format_file_size_short(uint64_t size, uint64_t * outUsedScale) {
951 string8 ret;
952 t_uint64 scale = 1;
953 const char * unit = "B";
954 const char * const unitTable[] = {"B","KB","MB","GB","TB"};
955 for(t_size walk = 1; walk < PFC_TABSIZE(unitTable); ++walk) {
956 t_uint64 next = scale * 1024;
957 if (size < next) break;
958 scale = next; unit = unitTable[walk];
959 }
960 ret << ( size / scale );
961
962 if (scale > 1 && ret.length() < 3) {
963 t_size digits = 3 - ret.length();
964 const t_uint64 mask = pow_int(10,digits);
965 t_uint64 remaining = ( (size * mask / scale) % mask );
966 while(digits > 0 && (remaining % 10) == 0) {
967 remaining /= 10; --digits;
968 }
969 if (digits > 0) {
970 ret << "." << format_uint(remaining, (t_uint32)digits);
971 }
972 }
973 ret << " " << unit;
974 if (outUsedScale != nullptr) *outUsedScale = scale;
975 return ret;
976 }
977
978 pfc::string8 format_index(size_t idx) {
979 return idx == SIZE_MAX ? "<n/a>" : pfc::format_uint(idx);
980 }
981
982 pfc::string8 format_permutation(const size_t* arg, size_t n) {
983 pfc::string_formatter ret;
984 for( size_t walk = 0; walk < n; ++ walk ) {
985 if (arg[walk] != walk) {
986 if ( !ret.is_empty() ) ret << ", ";
987 ret << arg[walk] << "->" << walk;
988 }
989 }
990 return ret;
991 }
992 pfc::string8 format_mask(pfc::bit_array const& mask, size_t n) {
993 pfc::string_formatter ret;
994 mask.for_each(true, 0, n, [&] (size_t idx) {
995 if (!ret.is_empty() ) ret << ", ";
996 ret << idx;
997 });
998 return ret;
999 }
1000
1001 bool string_base::truncate_eol(t_size start)
1002 {
1003 const char * ptr = get_ptr() + start;
1004 for(t_size n=start;*ptr;n++)
1005 {
1006 if (*ptr==10 || *ptr==13)
1007 {
1008 truncate(n);
1009 return true;
1010 }
1011 ptr++;
1012 }
1013 return false;
1014 }
1015
1016 bool string_base::fix_eol(const char * append,t_size start)
1017 {
1018 const bool rv = truncate_eol(start);
1019 if (rv) add_string(append);
1020 return rv;
1021 }
1022
1023 bool string_base::limit_length(t_size length_in_chars,const char * append)
1024 {
1025 bool rv = false;
1026 const char * base = get_ptr(), * ptr = base;
1027 while(length_in_chars && utf8_advance(ptr)) length_in_chars--;
1028 if (length_in_chars==0)
1029 {
1030 truncate(ptr-base);
1031 add_string(append);
1032 rv = true;
1033 }
1034 return rv;
1035 }
1036
1037 void string_base::truncate_to_parent_path() {
1038 size_t at = scan_filename();
1039 #ifdef _WIN32
1040 while(at > 0 && (*this)[at-1] == '\\') --at;
1041 if (at > 0 && (*this)[at-1] == ':' && (*this)[at] == '\\') ++at;
1042 #else
1043 // Strip trailing /
1044 while(at > 0 && (*this)[at-1] == '/') --at;
1045
1046 // Hit empty? Bring root / back to life
1047 if (at == 0 && (*this)[0] == '/') ++at;
1048
1049 // Deal with proto://
1050 if (at > 0 && (*this)[at-1] == ':') {
1051 while((*this)[at] == '/') ++at;
1052 }
1053 #endif
1054 this->truncate( at );
1055 }
1056
1057 size_t string_base::replace_string(const char * replace, const char * replaceWith, t_size start) {
1058 string_formatter temp;
1059 size_t ret = replace_string_ex(temp, replace, replaceWith, start);
1060 if ( ret > 0 ) * this = temp;
1061 return ret;
1062 }
1063 size_t string_base::replace_string_ex (string_base & temp, const char * replace, const char * replaceWith, t_size start) const {
1064 size_t srcDone = 0, walk = start;
1065 size_t occurances = 0;
1066 const char * const source = this->get_ptr();
1067 bool clear = false;
1068 const size_t replaceLen = strlen( replace );
1069 for(;;) {
1070 const char * ptr = strstr( source + walk, replace );
1071 if (ptr == NULL) {
1072 // end
1073 if (srcDone == 0) {
1074 return 0; // string not altered
1075 }
1076 temp.add_string( source + srcDone );
1077 break;
1078 }
1079 ++occurances;
1080 walk = ptr - source;
1081 if (! clear ) {
1082 temp.reset();
1083 clear = true;
1084 }
1085 temp.add_string( source + srcDone, walk - srcDone );
1086 temp.add_string( replaceWith );
1087 walk += replaceLen;
1088 srcDone = walk;
1089 }
1090 return occurances;
1091 }
1092
1093 void urlEncodeAppendRaw(pfc::string_base & out, const char * in, t_size inSize) {
1094 for(t_size walk = 0; walk < inSize; ++walk) {
1095 const char c = in[walk];
1096 if (c == ' ') out.add_byte('+');
1097 else if (pfc::char_is_ascii_alphanumeric(c) || c == '_') out.add_byte(c);
1098 else out << "%" << pfc::format_hex((t_uint8)c, 2);
1099 }
1100 }
1101 void urlEncodeAppend(pfc::string_base & out, const char * in) {
1102 for(;;) {
1103 const char c = *(in++);
1104 if (c == 0) break;
1105 else if (c == ' ') out.add_byte('+');
1106 else if (pfc::char_is_ascii_alphanumeric(c) || c == '_') out.add_byte(c);
1107 else out << "%" << pfc::format_hex((t_uint8)c, 2);
1108 }
1109 }
1110 void urlEncode(pfc::string_base & out, const char * in) {
1111 out.reset(); urlEncodeAppend(out, in);
1112 }
1113
1114 unsigned char_to_dec(char c) {
1115 PFC_ASSERT(c != 0);
1116 if (c >= '0' && c <= '9') return (unsigned)(c - '0');
1117 else throw exception_invalid_params();
1118 }
1119
1120 unsigned char_to_hex(char c) {
1121 if (c >= '0' && c <= '9') return (unsigned)(c - '0');
1122 else if (c >= 'a' && c <= 'f') return (unsigned)(c - 'a' + 10);
1123 else if (c >= 'A' && c <= 'F') return (unsigned)(c - 'A' + 10);
1124 else throw exception_invalid_params();
1125 }
1126
1127
1128 static constexpr t_uint8 ascii_tolower_table[128] = {0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0A,0x0B,0x0C,0x0D,0x0E,0x0F,0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1A,0x1B,0x1C,0x1D,0x1E,0x1F,0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2A,0x2B,0x2C,0x2D,0x2E,0x2F,0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3A,0x3B,0x3C,0x3D,0x3E,0x3F,0x40,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x5B,0x5C,0x5D,0x5E,0x5F,0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0x7B,0x7C,0x7D,0x7E,0x7F};
1129
1130 uint32_t charLower(uint32_t param)
1131 {
1132 if (param<128) {
1133 return ascii_tolower_table[param];
1134 }
1135 #ifdef PFC_WINDOWS_DESKTOP_APP
1136 else if (param<0x10000) {
1137 return (uint32_t)(size_t)CharLowerW((WCHAR*)(size_t)param);
1138 }
1139 #endif
1140 else return param;
1141 }
1142
1143 uint32_t charUpper(uint32_t param)
1144 {
1145 if (param<128) {
1146 if (param>='a' && param<='z') param -= (uint32_t)( 'a' - 'A' );
1147 return param;
1148 }
1149 #ifdef PFC_WINDOWS_DESKTOP_APP
1150 else if (param<0x10000) {
1151 return (uint32_t)(size_t)CharUpperW((WCHAR*)(size_t)param);
1152 }
1153 #endif
1154 else return param;
1155 }
1156
1157
1158 bool stringEqualsI_ascii(const char * p1,const char * p2) noexcept {
1159 for(;;)
1160 {
1161 char c1 = *p1;
1162 char c2 = *p2;
1163 if (c1 > 0 && c2 > 0) {
1164 if (ascii_tolower_table[ (unsigned) c1 ] != ascii_tolower_table[ (unsigned) c2 ]) return false;
1165 } else {
1166 if (c1 == 0 && c2 == 0) return true;
1167 if (c1 == 0 || c2 == 0) return false;
1168 if (c1 != c2) return false;
1169 }
1170 ++p1; ++p2;
1171 }
1172 }
1173
1174 bool stringEqualsI_utf8(const char * p1,const char * p2) noexcept
1175 {
1176 for(;;)
1177 {
1178 char c1 = *p1;
1179 char c2 = *p2;
1180 if (c1 > 0 && c2 > 0) {
1181 if (ascii_tolower_table[ (unsigned) c1 ] != ascii_tolower_table[ (unsigned) c2 ]) return false;
1182 ++p1; ++p2;
1183 } else {
1184 if (c1 == 0 && c2 == 0) return true;
1185 if (c1 == 0 || c2 == 0) return false;
1186 unsigned w1,w2; t_size d1,d2;
1187 d1 = utf8_decode_char(p1,w1);
1188 d2 = utf8_decode_char(p2,w2);
1189 if (d1 == 0 || d2 == 0) return false; // bad UTF-8, bail
1190 if (w1 != w2) {
1191 if (charLower(w1) != charLower(w2)) return false;
1192 }
1193 p1 += d1;
1194 p2 += d2;
1195 }
1196 }
1197 }
1198
1199 char ascii_tolower_lookup(char c) {
1200 PFC_ASSERT( c >= 0);
1201 return (char)ascii_tolower_table[ (unsigned) c ];
1202 }
1203
1204 void string_base::fix_dir_separator(char c) {
1205 #ifdef _WIN32
1206 end_with(c);
1207 #else
1208 end_with_slash();
1209 #endif
1210 }
1211
1212
1213 bool string_has_prefix( const char * string, const char * prefix ) {
1214 for(size_t w = 0; ; ++w ) {
1215 char c = prefix[w];
1216 if (c == 0) return true;
1217 if (string[w] != c) return false;
1218 }
1219 }
1220 const char* string_skip_prefix_i(const char* string, const char* prefix) {
1221 const char* p1 = string; const char* p2 = prefix;
1222 for (;;) {
1223 unsigned w1, w2; size_t d1, d2;
1224 d1 = utf8_decode_char(p1, w1);
1225 d2 = utf8_decode_char(p2, w2);
1226 if (d2 == 0) return p1;
1227 if (d1 == 0) return nullptr;
1228 if (w1 != w2) {
1229 if (charLower(w1) != charLower(w2)) return nullptr;
1230 }
1231 p1 += d1; p2 += d2;
1232 }
1233 }
1234 bool string_has_prefix_i( const char * string, const char * prefix ) {
1235 return string_skip_prefix_i(string, prefix) != nullptr;
1236 }
1237 bool string_has_suffix( const char * string, const char * suffix ) {
1238 size_t len = strlen( string );
1239 size_t suffixLen = strlen( suffix );
1240 if (suffixLen > len) return false;
1241 size_t base = len - suffixLen;
1242 return memcmp( string + base, suffix, suffixLen * sizeof(char)) == 0;
1243 }
1244 bool string_has_suffix_i( const char * string, const char * suffix ) {
1245 for(;;) {
1246 if (*string == 0) return false;
1247 if (stringEqualsI_utf8( string, suffix )) return true;
1248 if (!utf8_advance(string)) return false;
1249 }
1250 }
1251
1252 char * strDup(const char * src) {
1253 #ifdef _MSC_VER
1254 return _strdup(src);
1255 #else
1256 return strdup(src);
1257 #endif
1258 }
1259
1260
1261 string_part_ref string_part_ref::make(const char * ptr, t_size len) {
1262 string_part_ref val = {ptr, len}; return val;
1263 }
1264
1265 string_part_ref string_part_ref::substring(t_size base) const {
1266 PFC_ASSERT( base <= m_len );
1267 return make(m_ptr + base, m_len - base);
1268 }
1269 string_part_ref string_part_ref::substring(t_size base, t_size len) const {
1270 PFC_ASSERT( base <= m_len && base + len <= m_len );
1271 return make(m_ptr + base, len);
1272 }
1273
1274 string_part_ref string_part_ref::make( const char * str ) {return make( str, strlen(str) ); }
1275
1276 bool string_part_ref::equals( string_part_ref other ) const {
1277 if ( other.m_len != this->m_len ) return false;
1278 return memcmp( other.m_ptr, this->m_ptr, m_len ) == 0;
1279 }
1280 bool string_part_ref::equals( const char * str ) const {
1281 return equals(make(str) );
1282 }
1283
1284 string8 lineEndingsToWin(const char * str) {
1285 string8 ret;
1286 const char * walk = str;
1287 for( ;; ) {
1288 const char * eol = strchr( walk, '\n' );
1289 if ( eol == nullptr ) {
1290 ret += walk; break;
1291 }
1292 const char * next = eol + 1;
1293 if ( eol > walk ) {
1294 if (eol[-1] == '\r') --eol;
1295 if ( eol > walk ) ret.add_string_nc(walk, eol-walk);
1296 }
1297 ret.add_string_nc("\r\n",2);
1298 walk = next;
1299 }
1300 return ret;
1301 }
1302
1303
1304 string8 format_char(char c) {
1305 string8 ret; ret.add_byte(c); return ret;
1306 }
1307
1308 string8 format_ptr( const void * ptr ) {
1309 string8 temp;
1310 temp << "0x";
1311 temp << format_hex_lowercase( (size_t) ptr, sizeof(ptr) * 2 );
1312 return temp;
1313 }
1314
1315
1316 string8 format_pad_left(t_size p_chars, t_uint32 p_padding, const char * p_string, t_size p_string_length) {
1317 string8 m_buffer;
1318 t_size source_len = 0, source_walk = 0;
1319
1320 while (source_walk < p_string_length && source_len < p_chars) {
1321 unsigned dummy;
1322 t_size delta = pfc::utf8_decode_char(p_string + source_walk, dummy, p_string_length - source_walk);
1323 if (delta == 0) break;
1324 source_len++;
1325 source_walk += delta;
1326 }
1327
1328 m_buffer.add_string(p_string, source_walk);
1329 m_buffer.add_chars(p_padding, p_chars - source_len);
1330 return m_buffer;
1331 }
1332
1333 string8 format_pad_right(t_size p_chars, t_uint32 p_padding, const char * p_string, t_size p_string_length) {
1334 string8 m_buffer;
1335 t_size source_len = 0, source_walk = 0;
1336
1337 while (source_walk < p_string_length && source_len < p_chars) {
1338 unsigned dummy;
1339 t_size delta = pfc::utf8_decode_char(p_string + source_walk, dummy, p_string_length - source_walk);
1340 if (delta == 0) break;
1341 source_len++;
1342 source_walk += delta;
1343 }
1344
1345 m_buffer.add_chars(p_padding, p_chars - source_len);
1346 m_buffer.add_string(p_string, source_walk);
1347 return m_buffer;
1348 }
1349
1350 string8 stringToUpper(const char * str, size_t len) {
1351 string8 ret;
1352 stringToUpperAppend(ret, str, len);
1353 return ret;
1354 }
1355 string8 stringToLower(const char * str, size_t len) {
1356 string8 ret;
1357 stringToLowerAppend(ret, str, len);
1358 return ret;
1359 }
1360
1361 pfc::string8 prefixLines(const char* str, const char* prefix, const char * setEOL) {
1362 const auto temp = pfc::splitStringByLines2(str);
1363 pfc::string8 ret; ret.prealloc(1024);
1364 for (auto& line : temp) {
1365 if ( line.length() > 0 ) ret << prefix << line << setEOL;
1366 }
1367 return ret;
1368 }
1369
1370 pfc::string8 recover_invalid_utf8(const char* in, const char* subst) {
1371 pfc::string8 ret; ret.prealloc(strlen(in));
1372 for (;;) {
1373 char c = *in;
1374 if (c == 0) break;
1375 if (c < ' ') {
1376 ret += subst;
1377 } else {
1378 ret.add_byte(c);
1379 }
1380 ++in;
1381 }
1382 return ret;
1383 }
1384 static bool is_spacing(char c) {
1385 switch (c) {
1386 case ' ': case '\n': case '\r': case '\t': return true;
1387 default: return false;
1388 }
1389 }
1390 pfc::string8 string_trim_spacing(const char* in) {
1391 const char* temp_ptr = in;
1392 while (is_spacing(*temp_ptr)) temp_ptr++;
1393 const char* temp_start = temp_ptr;
1394 const char* temp_end = temp_ptr;
1395 while (*temp_ptr)
1396 {
1397 if (!is_spacing(*temp_ptr)) temp_end = temp_ptr + 1;
1398 temp_ptr++;
1399 }
1400
1401 return string_part_ref { temp_start, (size_t)(temp_end - temp_start) };
1402 }
1403 } //namespace pfc