|
1
|
1 #pragma once
|
|
|
2
|
|
|
3
|
|
|
4 #include <set>
|
|
|
5 #include <map>
|
|
|
6 #include <string>
|
|
|
7 #include <vector>
|
|
|
8 #include "fixed_map.h"
|
|
|
9
|
|
|
10 //! Implementation of string matching for search purposes, such as media library search or typefind in list views. \n
|
|
|
11 //! Inspired by Unicode asymetic search, but not strictly implementing the Unicode asymetric search specifications. \n
|
|
|
12 //! \n
|
|
|
13 //! Keeping a global instance of it is recommended, due to one time init overhead. \n
|
|
|
14 //! Thread safety: safe to call concurrently once constructed.
|
|
|
15
|
|
|
16 class SmartStrStr {
|
|
|
17 public:
|
|
|
18 SmartStrStr();
|
|
|
19
|
|
|
20 static bool isWordChar(unsigned c);
|
|
|
21 static bool isWordChar(const char* ptr);
|
|
|
22 static bool isValidWord(const char*);
|
|
|
23 static void findWords(const char*, std::function<void(pfc::string_part_ref)>);
|
|
|
24
|
|
|
25 //! Returns ptr to the end of the string if positive (for continuing search), nullptr if negative.
|
|
|
26 const char * strStrEnd(const char * pString, const char * pSubString, size_t * outFoundAt = nullptr) const;
|
|
|
27 const char16_t * strStrEnd16(const char16_t * pString, const char16_t * pSubString, size_t * outFoundAt = nullptr) const;
|
|
|
28 const wchar_t * strStrEndW(const wchar_t * pString, const wchar_t * pSubString, size_t * outFoundAt = nullptr) const;
|
|
|
29
|
|
|
30 const char* strStrEndWord(const char* pString, const char* pSubString, size_t* outFoundAt = nullptr) const;
|
|
|
31
|
|
|
32 bool testSubstring( const char * str, const char * sub ) const;
|
|
|
33 bool testSubstring16( const char16_t * str, const char16_t * sub ) const;
|
|
|
34 bool testSubstringW( const wchar_t * str, const wchar_t * sub ) const;
|
|
|
35
|
|
|
36 //! Returns ptr to the end of the string if positive (for continuing search), nullptr if negative.
|
|
|
37 const char * matchHere(const char * pString, const char * pUserString) const;
|
|
|
38 const char16_t * matchHere16(const char16_t * pString, const char16_t * pUserString) const;
|
|
|
39 const wchar_t * matchHereW( const wchar_t * pString, const wchar_t * pUserString) const;
|
|
|
40
|
|
|
41 //! String-equals tool, compares strings rather than searching for occurance
|
|
|
42 bool equals( const char * pString, const char * pUserString) const;
|
|
|
43 bool equals16( const char16_t * pString, const char16_t * pUserString) const;
|
|
|
44 bool equalsW( const wchar_t * pString, const wchar_t * pUserString) const;
|
|
|
45
|
|
|
46 //! One-char match. Doesn't use twoCharMappings, use only if you have to operate on char by char basis rather than call the other methods.
|
|
|
47 bool matchOneChar(uint32_t cInput, uint32_t cData) const;
|
|
|
48
|
|
|
49 static SmartStrStr& global();
|
|
|
50
|
|
|
51 pfc::string8 transformStr(const char * str) const;
|
|
|
52 void transformStrHere(pfc::string8& out, const char* in) const;
|
|
|
53 void transformStrHere(pfc::string8& out, const char* in, size_t inLen) const;
|
|
|
54 private:
|
|
|
55 template<typename char_t> const char_t * strStrEnd_(const char_t * pString, const char_t * pSubString, size_t * outFoundAt = nullptr) const;
|
|
|
56 template<typename char_t> const char_t * matchHere_(const char_t * pString, const char_t * pUserString) const;
|
|
|
57 template<typename char_t> bool equals_( const char_t * pString, const char_t * pUserString) const;
|
|
|
58
|
|
|
59 bool testSubString_prefix(const char* str, const char* sub, const char * prefix, size_t prefixLen) const;
|
|
|
60 bool testSubString_prefix(const char* str, const char* sub, uint32_t c) const;
|
|
|
61 bool testSubString_prefix_subst(const char* str, const char* sub, uint32_t c) const;
|
|
|
62
|
|
|
63 static uint32_t Transform(uint32_t c);
|
|
|
64 static uint32_t ToLower(uint32_t c);
|
|
|
65
|
|
|
66 void InitTwoCharMappings();
|
|
|
67
|
|
|
68 fixed_map< uint32_t, uint32_t > m_downconvert;
|
|
|
69 fixed_map< uint32_t, std::set<uint32_t> > m_substitutions;
|
|
|
70 fixed_map< uint32_t, std::set<uint32_t> > m_substitutionsReverse;
|
|
|
71
|
|
|
72
|
|
|
73 fixed_map<uint32_t, const char* > m_twoCharMappings;
|
|
|
74 fixed_map<uint32_t, uint32_t> m_twoCharMappingsReverse;
|
|
|
75 };
|
|
|
76
|
|
|
77
|
|
|
78 class SmartStrFilter {
|
|
|
79 public:
|
|
|
80 typedef std::map<std::string, t_size> t_stringlist;
|
|
|
81 SmartStrFilter() { }
|
|
|
82 SmartStrFilter(t_stringlist const& arg) : m_items(arg) {}
|
|
|
83 SmartStrFilter(t_stringlist&& arg) : m_items(std::move(arg)) {}
|
|
|
84 SmartStrFilter(const char* p) { init(p, strlen(p)); }
|
|
|
85 SmartStrFilter(const char* p, size_t l) { init(p, l); }
|
|
|
86
|
|
|
87 static bool is_spacing(char c) { return c == ' ' || c == 10 || c == 13 || c == '\t'; }
|
|
|
88
|
|
|
89 void init(const char* ptr, size_t len);
|
|
|
90 void init( const char * ptr ) { init(ptr, strlen(ptr)); }
|
|
|
91 bool test(const char* src) const;
|
|
|
92 bool testWords(const char* src) const;
|
|
|
93 bool test_disregardCounts(const char* src) const;
|
|
|
94
|
|
|
95 const t_stringlist& items() const { return m_items; }
|
|
|
96 operator bool() const { return !m_items.empty(); }
|
|
|
97 bool empty() const { return m_items.empty(); }
|
|
|
98
|
|
|
99 SmartStrStr & _SmartStrStr() const { return *dc; }
|
|
|
100 private:
|
|
|
101 t_stringlist m_items;
|
|
|
102 SmartStrStr * dc = &SmartStrStr::global();
|
|
|
103 };
|