diff foosdk/sdk/pfc/string_base.h @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/foosdk/sdk/pfc/string_base.h	Mon Jan 05 02:15:46 2026 -0500
@@ -0,0 +1,438 @@
+#pragma once
+
+#include <utility> // std::forward
+
+#include "primitives.h"
+#include "string-part.h"
+
+namespace pfc {
+	
+	t_size scan_filename(const char * ptr);
+
+	bool is_path_separator(unsigned c);
+	bool is_path_bad_char(unsigned c);
+	bool is_valid_utf8(const char * param,t_size max = SIZE_MAX);
+    bool is_canonical_utf8(const char * param, size_t max = SIZE_MAX);
+	bool is_lower_ascii(const char * param);
+	bool is_multiline(const char * p_string,t_size p_len = SIZE_MAX);
+	bool has_path_bad_chars(const char * param);
+	void convert_to_lower_ascii(const char * src,t_size max,char * out,char replace = '?');//out should be at least strlen(src)+1 long
+
+	template<typename char_t> inline char_t ascii_tolower(char_t c) {if (c >= 'A' && c <= 'Z') c += 'a' - 'A'; return c;}
+	template<typename char_t> inline char_t ascii_toupper(char_t c) {if (c >= 'a' && c <= 'z') c += 'A' - 'a'; return c;}
+
+	t_size string_find_first(const char * p_string,char p_tofind,t_size p_start = 0);	//returns infinite if not found
+	t_size string_find_last(const char * p_string,char p_tofind,t_size p_start = SIZE_MAX);	//returns infinite if not found
+	t_size string_find_first(const char * p_string,const char * p_tofind,t_size p_start = 0);	//returns infinite if not found
+	t_size string_find_last(const char * p_string,const char * p_tofind,t_size p_start = SIZE_MAX);	//returns infinite if not found
+
+	t_size string_find_first_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start = 0);	//returns infinite if not found
+	t_size string_find_last_ex(const char * p_string,t_size p_string_length,char p_tofind,t_size p_start = SIZE_MAX);	//returns infinite if not found
+	t_size string_find_first_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = 0);	//returns infinite if not found
+	t_size string_find_last_ex(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = SIZE_MAX);	//returns infinite if not found
+
+
+	t_size string_find_first_nc(const char * p_string,t_size p_string_length,char c,t_size p_start = 0); // lengths MUST be valid, no checks are performed (faster than the other flavour)
+	t_size string_find_first_nc(const char * p_string,t_size p_string_length,const char * p_tofind,t_size p_tofind_length,t_size p_start = 0); // lengths MUST be valid, no checks are performed (faster than the other falvour);
+
+
+    bool string_has_prefix( const char * string, const char * prefix );
+    bool string_has_prefix_i( const char * string, const char * prefix );
+	const char * string_skip_prefix_i(const char* string, const char* prefix);
+    bool string_has_suffix( const char * string, const char * suffix );
+    bool string_has_suffix_i( const char * string, const char * suffix );
+	
+	bool string_is_numeric(const char * p_string,t_size p_length = SIZE_MAX) noexcept;
+	template<typename char_t> inline bool char_is_numeric(char_t p_char) noexcept {return p_char >= '0' && p_char <= '9';}
+	inline bool char_is_hexnumeric(char p_char) noexcept {return char_is_numeric(p_char) || (p_char >= 'a' && p_char <= 'f') || (p_char >= 'A' && p_char <= 'F');}
+	inline bool char_is_ascii_alpha_upper(char p_char) noexcept {return p_char >= 'A' && p_char <= 'Z';}
+	inline bool char_is_ascii_alpha_lower(char p_char) noexcept {return p_char >= 'a' && p_char <= 'z';}
+	inline bool char_is_ascii_alpha(char p_char) noexcept {return char_is_ascii_alpha_lower(p_char) || char_is_ascii_alpha_upper(p_char);}
+	inline bool char_is_ascii_alphanumeric(char p_char) noexcept {return char_is_ascii_alpha(p_char) || char_is_numeric(p_char);}
+	
+	unsigned atoui_ex(const char * ptr,t_size max) noexcept;
+	t_int64 atoi64_ex(const char * ptr,t_size max) noexcept;
+	t_uint64 atoui64_ex(const char * ptr,t_size max) noexcept;
+	
+	//Throws exception_invalid_params on failure.
+	unsigned char_to_hex(char c);
+	unsigned char_to_dec(char c);
+
+	//Throws exception_invalid_params or exception_overflow on failure.
+	template<typename t_uint> t_uint atohex(const char * in, t_size inLen) {
+		t_uint ret = 0;
+		const t_uint guard = (t_uint)0xF << (sizeof(t_uint) * 8 - 4);
+		for(t_size walk = 0; walk < inLen; ++walk) {
+			if (ret & guard) throw exception_overflow();
+			ret = (ret << 4) | char_to_hex(in[walk]);
+		}
+		return ret;
+	}
+	template<typename t_uint> t_uint atodec(const char * in, t_size inLen) {
+		t_uint ret = 0;
+		for(t_size walk = 0; walk < inLen; ++walk) {
+			const t_uint prev = ret;
+			ret = (ret * 10) + char_to_dec(in[walk]);
+			if ((ret / 10) != prev) throw exception_overflow();
+		}
+		return ret;
+	}
+
+	t_size strlen_utf8(const char * s,t_size num = SIZE_MAX) noexcept;//returns number of characters in utf8 string; num - no. of bytes (optional)
+	t_size utf8_char_len(const char * s,t_size max = SIZE_MAX) noexcept;//returns size of utf8 character pointed by s, in bytes, 0 on error
+	t_size utf8_char_len_from_header(char c) noexcept;
+	t_size utf8_chars_to_bytes(const char* string, t_size count) noexcept;
+
+	size_t strcpy_utf8_truncate(const char * src,char * out,size_t maxbytes);
+
+	template<typename char_t> void strcpy_t( char_t * out, const char_t * in ) {
+		for(;;) { char_t c = *in++; *out++ = c; if (c == 0) break; }
+	}
+
+	t_size utf8_decode_char(const char * src,unsigned & out,t_size src_bytes) noexcept;//returns length in bytes
+	t_size utf8_decode_char(const char * src,unsigned & out) noexcept;//returns length in bytes
+
+	t_size utf8_encode_char(unsigned c,char * out) noexcept;//returns used length in bytes, max 6
+
+
+	t_size utf16_decode_char(const char16_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
+	t_size utf16_encode_char(unsigned c,char16_t * out) noexcept;
+    
+#ifdef _MSC_VER
+	t_size utf16_decode_char(const wchar_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
+	t_size utf16_encode_char(unsigned c,wchar_t * out) noexcept;
+#endif
+
+	t_size wide_decode_char(const wchar_t * p_source,unsigned * p_out,t_size p_source_length = SIZE_MAX) noexcept;
+	t_size wide_encode_char(unsigned c,wchar_t * out) noexcept;
+
+    size_t uni_char_length(const char *);
+    size_t uni_char_length(const char16_t *);
+    size_t uni_char_length(const wchar_t *);
+    
+    size_t uni_decode_char(const char16_t * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
+    size_t uni_decode_char(const char * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
+    size_t uni_decode_char(const wchar_t * p_source, unsigned & p_out, size_t p_source_length = SIZE_MAX) noexcept;
+
+	size_t uni_encode_char(unsigned c, char* out) noexcept;
+	size_t uni_encode_char(unsigned c, char16_t* out) noexcept;
+	size_t uni_encode_char(unsigned c, wchar_t* out) noexcept;
+
+#ifdef __cpp_char8_t
+	inline size_t uni_char_length(const char8_t* arg) { return uni_char_length(reinterpret_cast<const char*>(arg)); }
+	inline size_t uni_decode_char(const char8_t* p_source, unsigned& p_out, size_t p_source_length = SIZE_MAX) noexcept { return uni_decode_char(reinterpret_cast<const char*>(p_source), p_out, p_source_length); }
+	inline size_t uni_encode_char(unsigned c, char8_t* out) noexcept { return uni_encode_char(c, reinterpret_cast<char*>(out)); }
+#endif
+	t_size strstr_ex(const char * p_string,t_size p_string_len,const char * p_substring,t_size p_substring_len) noexcept;
+
+
+	t_size skip_utf8_chars(const char * ptr,t_size count) noexcept;
+	char * strdup_n(const char * src,t_size len);
+
+	unsigned utf8_get_char(const char * src);
+
+	inline bool utf8_advance(const char * & var) noexcept {
+		t_size delta = utf8_char_len(var);
+		var += delta;
+		return delta>0;
+	}
+
+	inline bool utf8_advance(char * & var) noexcept {
+		t_size delta = utf8_char_len(var);
+		var += delta;
+		return delta>0;
+	}
+
+	inline const char * utf8_char_next(const char * src) noexcept {return src + utf8_char_len(src);}
+	inline char * utf8_char_next(char * src) noexcept {return src + utf8_char_len(src);}
+
+	template<t_size max_length>
+	class string_fixed_t : public pfc::string_base {
+	public:
+		inline string_fixed_t() {init();}
+		inline string_fixed_t(const string_fixed_t<max_length> & p_source) {init(); *this = p_source;}
+		inline string_fixed_t(const char * p_source) {init(); set_string(p_source);}
+		
+		inline const string_fixed_t<max_length> & operator=(const string_fixed_t<max_length> & p_source) {set_string(p_source);return *this;}
+		inline const string_fixed_t<max_length> & operator=(const char * p_source) {set_string(p_source);return *this;}
+
+		char * lock_buffer(t_size p_requested_length) {
+			if (p_requested_length >= max_length) return NULL;
+			memset(m_data,0,sizeof(m_data));
+			return m_data;
+		}
+		void unlock_buffer() {
+			m_length = strlen(m_data);
+		}
+
+		inline operator const char * () const {return m_data;}
+		
+		const char * get_ptr() const {return m_data;}
+
+		void add_string(const char * ptr,t_size len) {
+			len = strlen_max(ptr,len);
+			if (m_length + len < m_length || m_length + len > max_length) throw pfc::exception_overflow();
+			for(t_size n=0;n<len;n++) {
+				m_data[m_length++] = ptr[n];
+			}
+			m_data[m_length] = 0;
+		}
+		void truncate(t_size len) {
+			if (len > max_length) len = max_length;
+			if (m_length > len) {
+				m_length = len;
+				m_data[len] = 0;
+			}
+		}
+		t_size get_length() const {return m_length;}
+	private:
+		inline void init() {
+			PFC_STATIC_ASSERT(max_length>1);
+			m_length = 0; m_data[0] = 0;
+		}
+		t_size m_length;
+		char m_data[max_length+1];
+	};
+
+	typedef stringLite string8_fastalloc;
+    typedef stringLite string8_fast;
+    typedef stringLite string8_fast_aggressive;
+	typedef stringLite string_formatter;
+	typedef stringLite string;
+
+}
+
+namespace pfc {
+
+	class string_buffer {
+	private:
+		string_base & m_owner;
+		char * m_buffer;
+	public:
+		explicit string_buffer(string_base & p_string,t_size p_requested_length) : m_owner(p_string) {m_buffer = m_owner.lock_buffer(p_requested_length);}
+		~string_buffer() {m_owner.unlock_buffer();}
+		char * get_ptr() {return m_buffer;}
+		operator char* () {return m_buffer;}
+	};
+
+	string8 string_printf(const char * fmt, ...);
+	string8 string_printf_va(const char * fmt, va_list list);
+	void string_printf_here(string_base & out, const char * fmt, ...);
+	void string_printf_here_va(string_base & out, const char * fmt, va_list list);
+
+	string8 format_time(uint64_t seconds);
+	string8 format_time_ex(double seconds, unsigned extra = 3);
+
+
+	double parse_timecode( const char * tc );
+
+	string8 string_filename(const char * fn);
+	string8 string_filename_ext(const char * fn);
+
+    const char * filename_ext_v2 ( const char * fn, char slash = 0 );
+    string8 remove_ext_v2( const char * fileNameDotExt ); // Just removes extension, assumes argument to hold just filename.ext, not whole path
+    const char * extract_ext_v2( const char * fileNameDotExt ); // Just extracts extension, assumes argument to hold just filename.ext, not whole path
+
+	size_t find_extension_offset(const char * src);
+	string8 string_extension(const char * src);
+	string8 string_replace_extension(const char * p_path, const char * p_ext);
+	string8 string_directory(const char * p_path);
+
+	void float_to_string(char * out,t_size out_max,double val,unsigned precision,bool force_sign = false);//doesnt add E+X etc, has internal range limits, useful for storing float numbers as strings without having to bother with international coma/dot settings BS
+	double string_to_float(const char * src,t_size len) noexcept;
+    double string_to_float(const char * src) noexcept;
+
+	string8 format_float(double p_val,unsigned p_width = 0,unsigned p_prec = 7);
+
+	struct format_int_t {
+		char m_buffer[64] = {};
+		inline const char* c_str() const { return m_buffer; }
+		inline const char* get_ptr() const { return m_buffer; }
+		inline operator const char* () const { return c_str(); }
+	};
+
+	format_int_t format_int(t_int64 p_val, unsigned p_width = 0, unsigned p_base = 10);
+	format_int_t format_uint(t_uint64 p_val, unsigned p_width = 0, unsigned p_base = 10);
+	format_int_t format_hex(t_uint64 p_val, unsigned p_width = 0);
+	format_int_t format_hex_lowercase(t_uint64 p_val, unsigned p_width = 0);
+	
+	char format_hex_char_lowercase(unsigned p_val);
+	char format_hex_char(unsigned p_val);
+
+    
+	//typedef string8_fastalloc string_formatter;
+#define PFC_string_formatter() ::pfc::string_formatter()._formatter()
+
+    string8 format_ptr( const void * ptr );
+	string8 format_hexdump(const void * p_buffer,t_size p_bytes,const char * p_spacing = " ");
+	string8 format_hexdump_lowercase(const void * p_buffer,t_size p_bytes,const char * p_spacing = " ");
+	string8 format_fixedpoint(t_int64 p_val,unsigned p_point);
+
+	string8 format_char(char c);
+
+	string8 format_pad_left(t_size p_chars, t_uint32 p_padding /* = ' ' */, const char * p_string, t_size p_string_length = ~0);
+
+	string8 format_pad_right(t_size p_chars, t_uint32 p_padding /* = ' ' */, const char * p_string, t_size p_string_length = ~0);
+
+	string8 format_file_size_short(uint64_t size, uint64_t * outScaleUsed = nullptr);
+
+	string8 format_index(size_t idx);
+	string8 format_permutation(const size_t* arg, size_t n);
+	string8 format_mask(bit_array const& mask, size_t n);
+}
+
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,const char * p_source) {p_fmt.add_string_(p_source); return p_fmt;}
+	   pfc::string_base & operator<<(pfc::string_base & p_fmt,const wchar_t* p_source); // string_conv.cpp
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,pfc::string_part_ref source) {p_fmt.add_string(source.m_ptr, source.m_len); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,short p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned short p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,int p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,long p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned long p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,long long p_val) {p_fmt.add_string(pfc::format_int(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,unsigned long long p_val) {p_fmt.add_string(pfc::format_uint(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,double p_val) {p_fmt.add_string(pfc::format_float(p_val)); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,std::exception const & p_exception) {p_fmt.add_string(p_exception.what()); return p_fmt;}
+inline pfc::string_base & operator<<(pfc::string_base & p_fmt,pfc::format_int_t const& in) { p_fmt.add_string(in.c_str()); return p_fmt; }
+
+
+namespace pfc {
+
+
+	template<typename t_source> string8 format_array(t_source const & source, const char * separator = ", ") {
+		string8 ret;
+		const t_size count = array_size_t(source);
+		if (count > 0) {
+			ret << source[0];
+			for(t_size walk = 1; walk < count; ++walk) ret << separator << source[walk];
+		}
+		return ret;
+	};
+
+
+	template<typename TWord> string8 format_hexdump_ex(const TWord * buffer, t_size bufLen, const char * spacing = " ") {
+		string8 ret;
+		for(t_size n = 0; n < bufLen; n++) {
+			if (n > 0 && spacing != NULL) ret << spacing;
+			ret << format_hex(buffer[n],sizeof(TWord) * 2);
+		}
+		return ret;
+	}
+
+
+
+
+
+	typedef stringLite string_simple;
+}
+
+
+namespace pfc {
+
+
+	void stringToUpperAppend(string_base & p_out, const char * p_source, t_size p_sourceLen = SIZE_MAX);
+	void stringToLowerAppend(string_base & p_out, const char * p_source, t_size p_sourceLen = SIZE_MAX);
+	void stringToUpperHere(string_base& p_out, const char* p_source, t_size p_sourceLen = SIZE_MAX);
+	void stringToLowerHere(string_base& p_out, const char* p_source, t_size p_sourceLen = SIZE_MAX);
+	t_uint32 charLower(t_uint32 param);
+	t_uint32 charUpper(t_uint32 param);
+	char ascii_tolower_lookup(char c);
+
+
+	class string_base_ref : public string_base {
+	public:
+		string_base_ref(const char * ptr) : m_ptr(ptr), m_len(strlen(ptr)) {}
+		const char * get_ptr() const {return m_ptr;}
+		t_size get_length() const {return m_len;}
+	private:
+		void add_string(const char *,t_size) {throw pfc::exception_not_implemented();}
+		void set_string(const char *,t_size) {throw pfc::exception_not_implemented();}
+		void truncate(t_size) {throw pfc::exception_not_implemented();}
+		char * lock_buffer(t_size) {throw pfc::exception_not_implemented();}
+		void unlock_buffer() {throw pfc::exception_not_implemented();}
+	private:
+		const char * const m_ptr;
+		t_size const m_len;
+	};
+
+	//! Writes a string to a fixed-size buffer. Truncates the string if necessary. Always writes a null terminator.
+	template<typename TChar, t_size len, typename TSource>
+	void stringToBuffer(TChar (&buffer)[len], const TSource & source) {
+		PFC_STATIC_ASSERT(len>0);
+		t_size walk;
+		for(walk = 0; walk < len - 1 && source[walk] != 0; ++walk) {
+			buffer[walk] = source[walk];
+		}
+		buffer[walk] = 0;
+	}
+
+	//! Same as stringToBuffer() but throws exception_overflow() if the string could not be fully written, including null terminator.
+	template<typename TChar, t_size len, typename TSource>
+	void stringToBufferGuarded(TChar (&buffer)[len], const TSource & source) {
+		t_size walk;
+		for(walk = 0; source[walk] != 0; ++walk) {
+			if (walk >= len) throw exception_overflow();
+			buffer[walk] = source[walk];
+		}
+		if (walk >= len) throw exception_overflow();
+		buffer[walk] = 0;
+	}
+
+
+	void urlEncodeAppendRaw(pfc::string_base & out, const char * in, t_size inSize);
+	void urlEncodeAppend(pfc::string_base & out, const char * in);
+	void urlEncode(pfc::string_base & out, const char * in);
+
+
+	char * strDup(const char * src); // POSIX strdup() clone, prevent MSVC complaining
+
+	string8 lineEndingsToWin( const char * str );
+
+	string8 stringToUpper( const char * str, size_t len = SIZE_MAX );
+	string8 stringToLower( const char * str, size_t len = SIZE_MAX );
+
+	template<typename t_source> static void stringCombine(pfc::string_base& out, t_source const& in, const char* separator, const char* separatorLast) {
+		out.reset();
+		for (typename t_source::const_iterator walk = in.first(); walk.is_valid(); ++walk) {
+			if (!out.is_empty()) {
+				if (walk == in.last()) out << separatorLast;
+				else out << separator;
+			}
+			out << stringToPtr(*walk);
+		}
+	}
+
+	template<typename TList>
+	string stringCombineList(const TList& list, stringp separator) {
+		typename TList::const_iterator iter = list.first();
+		string acc;
+		if (iter.is_valid()) {
+			acc = *iter;
+			for (++iter; iter.is_valid(); ++iter) {
+				acc = acc + separator + *iter;
+			}
+		}
+		return acc;
+	}
+
+
+	inline void formatHere(pfc::string_base&) {}
+	template<typename first_t, typename ... args_t> void formatHere(pfc::string_base& out, first_t && first, args_t && ... args) {
+		out << std::forward<first_t>(first);
+		formatHere(out, std::forward<args_t>(args) ...);
+	}
+	
+
+	template<typename ... args_t>
+	inline string format(args_t && ... args) {
+		string ret; formatHere(ret, std::forward<args_t>(args) ...); return ret;
+	}
+
+	pfc::string8 prefixLines(const char* str, const char* prefix, const char * setEOL = "\n");
+
+
+	pfc::string8 recover_invalid_utf8(const char* in, const char* subst = "_");
+
+	pfc::string8 string_trim_spacing(const char* in);
+}