annotate foosdk/sdk/pfc/SmartStrStr.h @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #pragma once
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4 #include <set>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5 #include <map>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6 #include <string>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 #include <vector>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8 #include "fixed_map.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 //! Implementation of string matching for search purposes, such as media library search or typefind in list views. \n
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 //! Inspired by Unicode asymetic search, but not strictly implementing the Unicode asymetric search specifications. \n
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 //! \n
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 //! Keeping a global instance of it is recommended, due to one time init overhead. \n
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14 //! Thread safety: safe to call concurrently once constructed.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 class SmartStrStr {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 public:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 SmartStrStr();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 static bool isWordChar(unsigned c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21 static bool isWordChar(const char* ptr);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 static bool isValidWord(const char*);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 static void findWords(const char*, std::function<void(pfc::string_part_ref)>);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 //! Returns ptr to the end of the string if positive (for continuing search), nullptr if negative.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 const char * strStrEnd(const char * pString, const char * pSubString, size_t * outFoundAt = nullptr) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 const char16_t * strStrEnd16(const char16_t * pString, const char16_t * pSubString, size_t * outFoundAt = nullptr) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 const wchar_t * strStrEndW(const wchar_t * pString, const wchar_t * pSubString, size_t * outFoundAt = nullptr) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30 const char* strStrEndWord(const char* pString, const char* pSubString, size_t* outFoundAt = nullptr) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 bool testSubstring( const char * str, const char * sub ) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33 bool testSubstring16( const char16_t * str, const char16_t * sub ) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 bool testSubstringW( const wchar_t * str, const wchar_t * sub ) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 //! Returns ptr to the end of the string if positive (for continuing search), nullptr if negative.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 const char * matchHere(const char * pString, const char * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 const char16_t * matchHere16(const char16_t * pString, const char16_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 const wchar_t * matchHereW( const wchar_t * pString, const wchar_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 //! String-equals tool, compares strings rather than searching for occurance
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42 bool equals( const char * pString, const char * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 bool equals16( const char16_t * pString, const char16_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 bool equalsW( const wchar_t * pString, const wchar_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 //! One-char match. Doesn't use twoCharMappings, use only if you have to operate on char by char basis rather than call the other methods.
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 bool matchOneChar(uint32_t cInput, uint32_t cData) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 static SmartStrStr& global();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 pfc::string8 transformStr(const char * str) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 void transformStrHere(pfc::string8& out, const char* in) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53 void transformStrHere(pfc::string8& out, const char* in, size_t inLen) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 private:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 template<typename char_t> const char_t * strStrEnd_(const char_t * pString, const char_t * pSubString, size_t * outFoundAt = nullptr) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 template<typename char_t> const char_t * matchHere_(const char_t * pString, const char_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57 template<typename char_t> bool equals_( const char_t * pString, const char_t * pUserString) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59 bool testSubString_prefix(const char* str, const char* sub, const char * prefix, size_t prefixLen) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 bool testSubString_prefix(const char* str, const char* sub, uint32_t c) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 bool testSubString_prefix_subst(const char* str, const char* sub, uint32_t c) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 static uint32_t Transform(uint32_t c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 static uint32_t ToLower(uint32_t c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 void InitTwoCharMappings();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 fixed_map< uint32_t, uint32_t > m_downconvert;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 fixed_map< uint32_t, std::set<uint32_t> > m_substitutions;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 fixed_map< uint32_t, std::set<uint32_t> > m_substitutionsReverse;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73 fixed_map<uint32_t, const char* > m_twoCharMappings;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 fixed_map<uint32_t, uint32_t> m_twoCharMappingsReverse;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78 class SmartStrFilter {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 public:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 typedef std::map<std::string, t_size> t_stringlist;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 SmartStrFilter() { }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 SmartStrFilter(t_stringlist const& arg) : m_items(arg) {}
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 SmartStrFilter(t_stringlist&& arg) : m_items(std::move(arg)) {}
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84 SmartStrFilter(const char* p) { init(p, strlen(p)); }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 SmartStrFilter(const char* p, size_t l) { init(p, l); }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 static bool is_spacing(char c) { return c == ' ' || c == 10 || c == 13 || c == '\t'; }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 void init(const char* ptr, size_t len);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90 void init( const char * ptr ) { init(ptr, strlen(ptr)); }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 bool test(const char* src) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 bool testWords(const char* src) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 bool test_disregardCounts(const char* src) const;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 const t_stringlist& items() const { return m_items; }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96 operator bool() const { return !m_items.empty(); }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 bool empty() const { return m_items.empty(); }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 SmartStrStr & _SmartStrStr() const { return *dc; }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 private:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 t_stringlist m_items;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102 SmartStrStr * dc = &SmartStrStr::global();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103 };