comparison foosdk/sdk/pfc/string_base.h @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
comparison
equal deleted inserted replaced
0:e9bb126753e7 1:20d02a178406
1 #pragma once
2
3 #include <utility> // std::forward
4
5 #include "primitives.h"
6 #include "string-part.h"
7
8 namespace pfc {
9
10 t_size scan_filename(const char * ptr);
11
12 bool is_path_separator(unsigned c);
13 bool is_path_bad_char(unsigned c);
14 bool is_valid_utf8(const char * param,t_size max = SIZE_MAX);
15 bool is_canonical_utf8(const char * param, size_t max = SIZE_MAX);
16 bool is_lower_ascii(const char * param);
17 bool is_multiline(const char * p_string,t_size p_len = SIZE_MAX);
18 bool has_path_bad_chars(const char * param);
19 void convert_to_lower_ascii(const char * src,t_size max,char * out,char replace = '?');//out should be at least strlen(src)+1 long
20
21 template<typename char_t> inline char_t ascii_tolower(char_t c) {if (c >= 'A' && c <= 'Z') c += 'a' - 'A'; return c;}
22 template<typename char_t> inline char_t ascii_toupper(char_t c) {if (c >= 'a' && c <= 'z') c += 'A' - 'a'; return c;}
23
24 t_size string_find_first(const char * p_string,char p_tofind,t_size p_start = 0); //returns infinite if not found
25 t_size string_find_last(const char * p_string,char p_tofind,t_size p_start = SIZE_MAX); //returns infinite if not found
26 t_size string_find_first(const char * p_string,const char * p_tofind,t_size p_start = 0); //returns infinite if not found
27 t_size string_find_last(const char * p_string,const char * p_tofind,t_size p_start = SIZE_MAX); //returns infinite if not found
28
29 t_size string_find_first_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start = 0); //returns infinite if not found
30 t_size string_find_last_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start = SIZE_MAX); //returns infinite if not found
31 t_size string_find_first_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = 0); //returns infinite if not found
32 t_size string_find_last_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = SIZE_MAX); //returns infinite if not found
33
34
35 t_size string_find_first_nc(const char * p_string,t_size p_string_length,char c,t_size p_start = 0); // lengths MUST be valid, no checks are performed (faster than the other flavour)
36 t_size string_find_first_nc(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = 0); // lengths MUST be valid, no checks are performed (faster than the other falvour);
37
38
39 bool string_has_prefix( const char * string, const char * prefix );
40 bool string_has_prefix_i( const char * string, const char * prefix );
41 const char * string_skip_prefix_i(const char* string, const char* prefix);
42 bool string_has_suffix( const char * string, const char * suffix );
43 bool string_has_suffix_i( const char * string, const char * suffix );
44
45 bool string_is_numeric(const char * p_string,t_size p_length = SIZE_MAX) noexcept;
46 template<typename char_t> inline bool char_is_numeric(char_t p_char) noexcept {return p_char >= '0' && p_char <= '9';}
47 inline bool char_is_hexnumeric(char p_char) noexcept {return char_is_numeric(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F');}
48 inline bool char_is_ascii_alpha_upper(char p_char) noexcept {return p_char >= 'A' && p_char <= 'Z';}
49 inline bool char_is_ascii_alpha_lower(char p_char) noexcept {return p_char >= 'a' && p_char <= 'z';}
50 inline bool char_is_ascii_alpha(char p_char) noexcept {return char_is_ascii_alpha_lower(p_char) || char_is_ascii_alpha_upper(p_char);}
51 inline bool char_is_ascii_alphanumeric(char p_char) noexcept {return char_is_ascii_alpha(p_char) || char_is_numeric(p_char);}
52
53 unsigned atoui_ex(const char * ptr,t_size max) noexcept;
54 t_int64 atoi64_ex(const char * ptr,t_size max) noexcept;
55 t_uint64 atoui64_ex(const char * ptr,t_size max) noexcept;
56
57 //Throws exception_invalid_params on failure.
58 unsigned char_to_hex(char c);
59 unsigned char_to_dec(char c);
60
61 //Throws exception_invalid_params or exception_overflow on failure.
62 template<typename t_uint> t_uint atohex(const char * in, t_size inLen) {
63 t_uint ret = 0;
64 const t_uint guard = (t_uint)0xF << (sizeof(t_uint) * 8 - 4);
65 for(t_size walk = 0; walk < inLen; ++walk) {
66 if (ret & guard) throw exception_overflow();
67 ret = (ret << 4) | char_to_hex(in[walk]);
68 }
69 return ret;
70 }
71 template<typename t_uint> t_uint atodec(const char * in, t_size inLen) {
72 t_uint ret = 0;
73 for(t_size walk = 0; walk < inLen; ++walk) {
74 const t_uint prev = ret;
75 ret = (ret * 10) + char_to_dec(in[walk]);
76 if ((ret / 10) != prev) throw exception_overflow();
77 }
78 return ret;
79 }
80
81 t_size strlen_utf8(const char * s,t_size num = SIZE_MAX) noexcept;//returns number of characters in utf8 string; num - no. of bytes (optional)
82 t_size utf8_char_len(const char * s,t_size max = SIZE_MAX) noexcept;//returns size of utf8 character pointed by s, in bytes, 0 on error
83 t_size utf8_char_len_from_header(char c) noexcept;
84 t_size utf8_chars_to_bytes(const char* string, t_size count) noexcept;
85
86 size_t strcpy_utf8_truncate(const char * src,char * out,size_t maxbytes);
87
88 template<typename char_t> void strcpy_t( char_t * out, const char_t * in ) {
89 for(;;) { char_t c = *in++; *out++ = c; if (c == 0) break; }
90 }
91
92 t_size utf8_decode_char(const char * src,unsigned & out,t_size src_bytes) noexcept;//returns length in bytes
93 t_size utf8_decode_char(const char * src,unsigned & out) noexcept;//returns length in bytes
94
95 t_size utf8_encode_char(unsigned c,char * out) noexcept;//returns used length in bytes, max 6
96
97
98 t_size utf16_decode_char(const char16_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
99 t_size utf16_encode_char(unsigned c,char16_t * out) noexcept;
100
101 #ifdef _MSC_VER
102 t_size utf16_decode_char(const wchar_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
103 t_size utf16_encode_char(unsigned c,wchar_t * out) noexcept;
104 #endif
105
106 t_size wide_decode_char(const wchar_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
107 t_size wide_encode_char(unsigned c,wchar_t * out) noexcept;
108
109 size_t uni_char_length(const char *);
110 size_t uni_char_length(const char16_t *);
111 size_t uni_char_length(const wchar_t *);
112
113 size_t uni_decode_char(const char16_t * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
114 size_t uni_decode_char(const char * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
115 size_t uni_decode_char(const wchar_t * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
116
117 size_t uni_encode_char(unsigned c, char* out) noexcept;
118 size_t uni_encode_char(unsigned c, char16_t* out) noexcept;
119 size_t uni_encode_char(unsigned c, wchar_t* out) noexcept;
120
121 #ifdef __cpp_char8_t
122 inline size_t uni_char_length(const char8_t* arg) { return uni_char_length(reinterpret_cast<const char*>(arg)); }
123 inline size_t uni_decode_char(const char8_t* p_source, unsigned& p_out, size_t p_source_length = SIZE_MAX) noexcept { return uni_decode_char(reinterpret_cast<const char*>(p_source), p_out, p_source_length); }
124 inline size_t uni_encode_char(unsigned c, char8_t* out) noexcept { return uni_encode_char(c, reinterpret_cast<char*>(out)); }
125 #endif
126 t_size strstr_ex(const char * p_string,t_size p_string_len,const char * p_substring,t_size p_substring_len) noexcept;
127
128
129 t_size skip_utf8_chars(const char * ptr,t_size count) noexcept;
130 char * strdup_n(const char * src,t_size len);
131
132 unsigned utf8_get_char(const char * src);
133
134 inline bool utf8_advance(const char * & var) noexcept {
135 t_size delta = utf8_char_len(var);
136 var += delta;
137 return delta>0;
138 }
139
140 inline bool utf8_advance(char * & var) noexcept {
141 t_size delta = utf8_char_len(var);
142 var += delta;
143 return delta>0;
144 }
145
146 inline const char * utf8_char_next(const char * src) noexcept {return src + utf8_char_len(src);}
147 inline char * utf8_char_next(char * src) noexcept {return src + utf8_char_len(src);}
148
149 template<t_size max_length>
150 class string_fixed_t : public pfc::string_base {
151 public:
152 inline string_fixed_t() {init();}
153 inline string_fixed_t(const string_fixed_t<max_length> & p_source) {init(); *this = p_source;}
154 inline string_fixed_t(const char * p_source) {init(); set_string(p_source);}
155
156 inline const string_fixed_t<max_length> & operator=(const string_fixed_t<max_length> & p_source) {set_string(p_source);return *this;}
157 inline const string_fixed_t<max_length> & operator=(const char * p_source) {set_string(p_source);return *this;}
158
159 char * lock_buffer(t_size p_requested_length) {
160 if (p_requested_length >= max_length) return NULL;
161 memset(m_data,0,sizeof(m_data));
162 return m_data;
163 }
164 void unlock_buffer() {
165 m_length = strlen(m_data);
166 }
167
168 inline operator const char * () const {return m_data;}
169
170 const char * get_ptr() const {return m_data;}
171
172 void add_string(const char * ptr,t_size len) {
173 len = strlen_max(ptr,len);
174 if (m_length + len < m_length || m_length + len > max_length) throw pfc::exception_overflow();
175 for(t_size n=0;n<len;n++) {
176 m_data[m_length++] = ptr[n];
177 }
178 m_data[m_length] = 0;
179 }
180 void truncate(t_size len) {
181 if (len > max_length) len = max_length;
182 if (m_length > len) {
183 m_length = len;
184 m_data[len] = 0;
185 }
186 }
187 t_size get_length() const {return m_length;}
188 private:
189 inline void init() {
190 PFC_STATIC_ASSERT(max_length>1);
191 m_length = 0; m_data[0] = 0;
192 }
193 t_size m_length;
194 char m_data[max_length+1];
195 };
196
197 typedef stringLite string8_fastalloc;
198 typedef stringLite string8_fast;
199 typedef stringLite string8_fast_aggressive;
200 typedef stringLite string_formatter;
201 typedef stringLite string;
202
203 }
204
205 namespace pfc {
206
207 class string_buffer {
208 private:
209 string_base & m_owner;
210 char * m_buffer;
211 public:
212 explicit string_buffer(string_base & p_string,t_size p_requested_length) : m_owner(p_string) {m_buffer = m_owner.lock_buffer(p_requested_length);}
213 ~string_buffer() {m_owner.unlock_buffer();}
214 char * get_ptr() {return m_buffer;}
215 operator char* () {return m_buffer;}
216 };
217
218 string8 string_printf(const char * fmt, ...);
219 string8 string_printf_va(const char * fmt, va_list list);
220 void string_printf_here(string_base & out, const char * fmt, ...);
221 void string_printf_here_va(string_base & out, const char * fmt, va_list list);
222
223 string8 format_time(uint64_t seconds);
224 string8 format_time_ex(double seconds, unsigned extra = 3);
225
226
227 double parse_timecode( const char * tc );
228
229 string8 string_filename(const char * fn);
230 string8 string_filename_ext(const char * fn);
231
232 const char * filename_ext_v2 ( const char * fn, char slash = 0 );
233 string8 remove_ext_v2( const char * fileNameDotExt ); // Just removes extension, assumes argument to hold just filename.ext, not whole path
234 const char * extract_ext_v2( const char * fileNameDotExt ); // Just extracts extension, assumes argument to hold just filename.ext, not whole path
235
236 size_t find_extension_offset(const char * src);
237 string8 string_extension(const char * src);
238 string8 string_replace_extension(const char * p_path, const char * p_ext);
239 string8 string_directory(const char * p_path);
240
241 void float_to_string(char * out,t_size out_max,double val,unsigned precision,bool force_sign = false);//doesnt add E+X etc, has internal range limits, useful for storing float numbers as strings without having to bother with international coma/dot settings BS
242 double string_to_float(const char * src,t_size len) noexcept;
243 double string_to_float(const char * src) noexcept;
244
245 string8 format_float(double p_val,unsigned p_width = 0,unsigned p_prec = 7);
246
247 struct format_int_t {
248 char m_buffer[64] = {};
249 inline const char* c_str() const { return m_buffer; }
250 inline const char* get_ptr() const { return m_buffer; }
251 inline operator const char* () const { return c_str(); }
252 };
253
254 format_int_t format_int(t_int64 p_val, unsigned p_width = 0, unsigned p_base = 10);
255 format_int_t format_uint(t_uint64 p_val, unsigned p_width = 0, unsigned p_base = 10);
256 format_int_t format_hex(t_uint64 p_val, unsigned p_width = 0);
257 format_int_t format_hex_lowercase(t_uint64 p_val, unsigned p_width = 0);
258
259 char format_hex_char_lowercase(unsigned p_val);
260 char format_hex_char(unsigned p_val);
261
262
263 //typedef string8_fastalloc string_formatter;
264 #define PFC_string_formatter() ::pfc::string_formatter()._formatter()
265
266 string8 format_ptr( const void * ptr );
267 string8 format_hexdump(const void * p_buffer,t_size p_bytes,const char * p_spacing = " ");
268 string8 format_hexdump_lowercase(const void * p_buffer,t_size p_bytes,const char * p_spacing = " ");
269 string8 format_fixedpoint(t_int64 p_val,unsigned p_point);
270
271 string8 format_char(char c);
272
273 string8 format_pad_left(t_size p_chars, t_uint32 p_padding /* = ' ' */, const char * p_string, t_size p_string_length = ~0);
274
275 string8 format_pad_right(t_size p_chars, t_uint32 p_padding /* = ' ' */, const char * p_string, t_size p_string_length = ~0);
276
277 string8 format_file_size_short(uint64_t size, uint64_t * outScaleUsed = nullptr);
278
279 string8 format_index(size_t idx);
280 string8 format_permutation(const size_t* arg, size_t n);
281 string8 format_mask(bit_array const& mask, size_t n);
282 }
283
284 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,const char * p_source) {p_fmt.add_string_(p_source); return p_fmt;}
285 pfc::string_base & operator<<(pfc::string_base & p_fmt,const wchar_t* p_source); // string_conv.cpp
286 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,pfc::string_part_ref source) {p_fmt.add_string(source.m_ptr, source.m_len); return p_fmt;}
287 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,short p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
288 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned short p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
289 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,int p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
290 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
291 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,long p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
292 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned long p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
293 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,long long p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
294 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned long long p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
295 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,double p_val) {p_fmt.add_string(pfc::format_float(p_val)); return p_fmt;}
296 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,std::exception const & p_exception) {p_fmt.add_string(p_exception.what()); return p_fmt;}
297 inline pfc::string_base & operator<<(pfc::string_base & p_fmt,pfc::format_int_t const& in) { p_fmt.add_string(in.c_str()); return p_fmt; }
298
299
300 namespace pfc {
301
302
303 template<typename t_source> string8 format_array(t_source const & source, const char * separator = ", ") {
304 string8 ret;
305 const t_size count = array_size_t(source);
306 if (count > 0) {
307 ret << source[0];
308 for(t_size walk = 1; walk < count; ++walk) ret << separator << source[walk];
309 }
310 return ret;
311 };
312
313
314 template<typename TWord> string8 format_hexdump_ex(const TWord * buffer, t_size bufLen, const char * spacing = " ") {
315 string8 ret;
316 for(t_size n = 0; n < bufLen; n++) {
317 if (n > 0 && spacing != NULL) ret << spacing;
318 ret << format_hex(buffer[n],sizeof(TWord) * 2);
319 }
320 return ret;
321 }
322
323
324
325
326
327 typedef stringLite string_simple;
328 }
329
330
331 namespace pfc {
332
333
334 void stringToUpperAppend(string_base & p_out, const char * p_source, t_size p_sourceLen = SIZE_MAX);
335 void stringToLowerAppend(string_base & p_out, const char * p_source, t_size p_sourceLen = SIZE_MAX);
336 void stringToUpperHere(string_base& p_out, const char* p_source, t_size p_sourceLen = SIZE_MAX);
337 void stringToLowerHere(string_base& p_out, const char* p_source, t_size p_sourceLen = SIZE_MAX);
338 t_uint32 charLower(t_uint32 param);
339 t_uint32 charUpper(t_uint32 param);
340 char ascii_tolower_lookup(char c);
341
342
343 class string_base_ref : public string_base {
344 public:
345 string_base_ref(const char * ptr) : m_ptr(ptr), m_len(strlen(ptr)) {}
346 const char * get_ptr() const {return m_ptr;}
347 t_size get_length() const {return m_len;}
348 private:
349 void add_string(const char *,t_size) {throw pfc::exception_not_implemented();}
350 void set_string(const char *,t_size) {throw pfc::exception_not_implemented();}
351 void truncate(t_size) {throw pfc::exception_not_implemented();}
352 char * lock_buffer(t_size) {throw pfc::exception_not_implemented();}
353 void unlock_buffer() {throw pfc::exception_not_implemented();}
354 private:
355 const char * const m_ptr;
356 t_size const m_len;
357 };
358
359 //! Writes a string to a fixed-size buffer. Truncates the string if necessary. Always writes a null terminator.
360 template<typename TChar, t_size len, typename TSource>
361 void stringToBuffer(TChar (&buffer)[len], const TSource & source) {
362 PFC_STATIC_ASSERT(len>0);
363 t_size walk;
364 for(walk = 0; walk < len - 1 && source[walk] != 0; ++walk) {
365 buffer[walk] = source[walk];
366 }
367 buffer[walk] = 0;
368 }
369
370 //! Same as stringToBuffer() but throws exception_overflow() if the string could not be fully written, including null terminator.
371 template<typename TChar, t_size len, typename TSource>
372 void stringToBufferGuarded(TChar (&buffer)[len], const TSource & source) {
373 t_size walk;
374 for(walk = 0; source[walk] != 0; ++walk) {
375 if (walk >= len) throw exception_overflow();
376 buffer[walk] = source[walk];
377 }
378 if (walk >= len) throw exception_overflow();
379 buffer[walk] = 0;
380 }
381
382
383 void urlEncodeAppendRaw(pfc::string_base & out, const char * in, t_size inSize);
384 void urlEncodeAppend(pfc::string_base & out, const char * in);
385 void urlEncode(pfc::string_base & out, const char * in);
386
387
388 char * strDup(const char * src); // POSIX strdup() clone, prevent MSVC complaining
389
390 string8 lineEndingsToWin( const char * str );
391
392 string8 stringToUpper( const char * str, size_t len = SIZE_MAX );
393 string8 stringToLower( const char * str, size_t len = SIZE_MAX );
394
395 template<typename t_source> static void stringCombine(pfc::string_base& out, t_source const& in, const char* separator, const char* separatorLast) {
396 out.reset();
397 for (typename t_source::const_iterator walk = in.first(); walk.is_valid(); ++walk) {
398 if (!out.is_empty()) {
399 if (walk == in.last()) out << separatorLast;
400 else out << separator;
401 }
402 out << stringToPtr(*walk);
403 }
404 }
405
406 template<typename TList>
407 string stringCombineList(const TList& list, stringp separator) {
408 typename TList::const_iterator iter = list.first();
409 string acc;
410 if (iter.is_valid()) {
411 acc = *iter;
412 for (++iter; iter.is_valid(); ++iter) {
413 acc = acc + separator + *iter;
414 }
415 }
416 return acc;
417 }
418
419
420 inline void formatHere(pfc::string_base&) {}
421 template<typename first_t, typename ... args_t> void formatHere(pfc::string_base& out, first_t && first, args_t && ... args) {
422 out << std::forward<first_t>(first);
423 formatHere(out, std::forward<args_t>(args) ...);
424 }
425
426
427 template<typename ... args_t>
428 inline string format(args_t && ... args) {
429 string ret; formatHere(ret, std::forward<args_t>(args) ...); return ret;
430 }
431
432 pfc::string8 prefixLines(const char* str, const char* prefix, const char * setEOL = "\n");
433
434
435 pfc::string8 recover_invalid_utf8(const char* in, const char* subst = "_");
436
437 pfc::string8 string_trim_spacing(const char* in);
438 }