annotate foosdk/sdk/pfc/SmartStrStr.cpp @ 1:20d02a178406 default tip

*: check in everything else yay
author Paper <paper@tflc.us>
date Mon, 05 Jan 2026 02:15:46 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
1 #include "pfc-lite.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
2
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
3 #include "string-conv-lite.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
4 #include "string_conv.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
5 #include "SmartStrStr.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
6 #include <algorithm>
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
7 #include "SmartStrStr-table.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
8 #include "SmartStrStr-twoCharMappings.h"
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
9
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
10 bool SmartStrStr::isWordChar(unsigned c) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
11 // FIX ME map Unicode ranges somehow
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
12 return c >= 128 || pfc::char_is_ascii_alphanumeric((char)c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
13 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
14
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
15 bool SmartStrStr::isWordChar(const char* ptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
16 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
17 size_t d = pfc::utf8_decode_char(ptr, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
18 if (d == 0) return false; // bad UTF-8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
19 return isWordChar(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
20 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
21
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
22 bool SmartStrStr::isValidWord(const char* ptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
23 if (*ptr == 0) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
24 do {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
25 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
26 size_t d = pfc::utf8_decode_char(ptr, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
27 if (d == 0) return false; // bad UTF-8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
28 if (!isWordChar(c)) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
29 ptr += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
30 } while (*ptr != 0);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
31 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
32 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
33
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
34 void SmartStrStr::findWords(const char* str, std::function<void(pfc::string_part_ref)> cb) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
35 size_t base = 0, walk = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
36 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
37 unsigned c = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
38 size_t d = pfc::utf8_decode_char(str + walk, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
39 if (d == 0) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
40
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
41 if (!SmartStrStr::isWordChar(c)) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
42 if (walk > base) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
43 cb(pfc::string_part(str + base, walk - base));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
44 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
45 base = walk + d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
46 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
47 walk += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
48 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
49 if (walk > base) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
50 cb(pfc::string_part(str + base, walk - base));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
51 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
52 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
53
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
54 SmartStrStr::SmartStrStr() {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
55 std::map<uint32_t, std::set<uint32_t> > substitutions, substitutionsReverse;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
56 std::map<uint32_t, uint32_t > downconvert;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
57
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
58 #if 1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
59 for (auto& walk : SmartStrStrTable) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
60 downconvert[walk.from] = walk.to;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
61 substitutions[walk.from].insert(walk.to);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
62 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
63 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
64 for (uint32_t walk = 128; walk < 0x10000; ++walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
65 uint32_t c = Transform(walk);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
66 if (c != walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
67 downconvert[walk] = c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
68 substitutions[walk].insert(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
69 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
70 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
71 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
72
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
73 for (uint32_t walk = 32; walk < 0x10000; ++walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
74 auto lo = ToLower(walk);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
75 if (lo != walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
76 auto & s = substitutions[walk]; s.insert(lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
77
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
78 auto iter = substitutions.find(lo);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
79 if (iter != substitutions.end()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
80 s.insert(iter->second.begin(), iter->second.end());
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
81 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
82 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
83 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
84
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
85 for( auto & walk : substitutions ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
86 for( auto & walk2 : walk.second ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
87 substitutionsReverse[walk2].insert(walk.first);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
88 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
89 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
90
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
91 this->m_substitutions.initialize(std::move(substitutions));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
92 this->m_substitutionsReverse.initialize(std::move(substitutionsReverse));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
93 this->m_downconvert.initialize(std::move(downconvert));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
94 InitTwoCharMappings();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
95 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
96
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
97 // == TEMPLATES ==
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
98 template<typename char_t> const char_t * SmartStrStr::matchHere_(const char_t * pString, const char_t * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
99 auto walkData = pString;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
100 auto walkUser = pUserString;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
101 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
102 if (*walkUser == 0) return walkData;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
103
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
104 uint32_t cData, cUser;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
105 size_t dData = pfc::uni_decode_char(walkData, cData);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
106 size_t dUser = pfc::uni_decode_char(walkUser, cUser);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
107 if (dData == 0 || dUser == 0) return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
108
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
109 if (cData != cUser) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
110 bool gotMulti = false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
111 {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
112 const char * cDataSubst = m_twoCharMappings.query(cData);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
113 if (cDataSubst != nullptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
114 PFC_ASSERT(strlen(cDataSubst) == 2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
115 if (matchOneChar(cUser, (uint32_t)cDataSubst[0])) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
116 auto walkUser2 = walkUser + dUser;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
117 uint32_t cUser2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
118 auto dUser2 = pfc::uni_decode_char(walkUser2, cUser2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
119 if (matchOneChar(cUser2, (uint32_t)cDataSubst[1])) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
120 gotMulti = true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
121 dUser += dUser2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
122 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
123 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
124 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
125 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
126 if (!gotMulti) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
127 if (!matchOneChar(cUser, cData)) return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
128 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
129 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
130
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
131 walkData += dData;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
132 walkUser += dUser;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
133 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
134 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
135 template<typename char_t> bool SmartStrStr::equals_( const char_t * pString, const char_t * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
136 auto p = this->matchHere_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
137 if ( p == nullptr ) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
138 return *p == 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
139 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
140
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
141 template<typename char_t> const char_t * SmartStrStr::strStrEnd_(const char_t * pString, const char_t * pSubString, size_t * outFoundAt) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
142 size_t walk = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
143 for (;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
144 if (pString[walk] == 0) return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
145 auto end = matchHere_(pString + walk, pSubString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
146 if (end != nullptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
147 if (outFoundAt != nullptr) * outFoundAt = walk;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
148 return end;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
149 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
150
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
151 size_t delta = pfc::uni_char_length(pString + walk);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
152 if (delta == 0) return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
153 walk += delta;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
154 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
155 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
156 // == END TEMPLATES ==
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
157
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
158 const char16_t * SmartStrStr::matchHere16(const char16_t * pString, const char16_t * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
159 return this->matchHere_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
160 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
161 const char * SmartStrStr::matchHere(const char * pString, const char * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
162 return this->matchHere_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
163 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
164 const wchar_t * SmartStrStr::matchHereW(const wchar_t * pString, const wchar_t * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
165 return this->matchHere_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
166 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
167
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
168 bool SmartStrStr::equals(const char * pString, const char * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
169 return equals_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
170 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
171 bool SmartStrStr::equals16(const char16_t* pString, const char16_t* pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
172 return equals_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
173 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
174 bool SmartStrStr::equalsW( const wchar_t * pString, const wchar_t * pUserString) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
175 return equals_(pString, pUserString);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
176 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
177 const char * SmartStrStr::strStrEnd(const char * pString, const char * pSubString, size_t * outFoundAt) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
178 return strStrEnd_(pString, pSubString, outFoundAt);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
179 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
180
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
181 const char16_t * SmartStrStr::strStrEnd16(const char16_t * pString, const char16_t * pSubString, size_t * outFoundAt) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
182 return strStrEnd_(pString, pSubString, outFoundAt);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
183 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
184
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
185 const wchar_t * SmartStrStr::strStrEndW(const wchar_t * pString, const wchar_t * pSubString, size_t * outFoundAt) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
186 return strStrEnd_(pString, pSubString, outFoundAt);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
187 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
188
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
189 static bool wordBeginsHere(const char* base, size_t offset) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
190 if (offset == 0) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
191 for (size_t len = 1; len <= offset && len <= 6; --len) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
192 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
193 if (pfc::utf8_decode_char(base + offset - len, c) == len) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
194 return !SmartStrStr::isWordChar(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
195 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
196 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
197 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
198 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
199
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
200 const char* SmartStrStr::strStrEndWord(const char* pString, const char* pSubString, size_t* outFoundAt) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
201 size_t walk = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
202 for (;;) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
203 size_t foundAt = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
204 auto end = strStrEnd(pString + walk, pSubString, &foundAt);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
205 if (end == nullptr) return nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
206 foundAt += walk;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
207 if (!isWordChar(end) && wordBeginsHere(pString, foundAt)) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
208 if (outFoundAt) *outFoundAt = foundAt;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
209 return end;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
210 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
211 walk = end - pString;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
212 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
213 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
214
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
215 bool SmartStrStr::matchOneChar(uint32_t cInput, uint32_t cData) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
216 if (cInput == cData) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
217 auto v = m_substitutions.query_ptr(cData);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
218 if (v == nullptr) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
219 return v->count(cInput) > 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
220 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
221
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
222 pfc::string8 SmartStrStr::transformStr(const char* str) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
223 pfc::string8 ret; transformStrHere(ret, str); return ret;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
224 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
225
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
226 void SmartStrStr::transformStrHere(pfc::string8& out, const char* in) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
227 transformStrHere(out, in, strlen(in));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
228 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
229
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
230 void SmartStrStr::transformStrHere(pfc::string8& out, const char* in, size_t inLen) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
231 out.prealloc(inLen);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
232 out.clear();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
233 for (size_t walk = 0; walk < inLen; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
234 unsigned c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
235 size_t d = pfc::utf8_decode_char(in + walk, c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
236 if (d == 0 || walk+d>inLen) break;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
237 walk += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
238 const char* alt = m_twoCharMappings.query(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
239 if (alt != nullptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
240 out << alt; continue;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
241 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
242 unsigned alt2 = m_downconvert.query(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
243 if (alt2 != 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
244 out.add_char(alt2); continue;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
245 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
246 out.add_char(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
247 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
248 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
249
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
250 #if 0 // Windows specific code
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
251 uint32_t SmartStrStr::Transform(uint32_t c) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
252 wchar_t wide[2] = {}; char out[4] = {};
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
253 pfc::utf16_encode_char(c, wide);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
254 BOOL fail = FALSE;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
255 if (WideCharToMultiByte(pfc::stringcvt::codepage_ascii, 0, wide, 2, out, 4, "?", &fail) > 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
256 if (!fail) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
257 if (out[0] > 0 && out[1] == 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
258 c = out[0];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
259 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
260 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
261 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
262 return c;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
263 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
264 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
265
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
266 uint32_t SmartStrStr::ToLower(uint32_t c) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
267 return pfc::charLower(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
268 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
269
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
270 void SmartStrStr::InitTwoCharMappings() {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
271 std::map<uint32_t, const char* > mappings;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
272 std::map<uint32_t, uint32_t> reverse;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
273 for (auto& walk : twoCharMappings) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
274 mappings[walk.from] = walk.to;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
275 uint32_t c1, c2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
276 const char * p = walk.to;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
277 size_t d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
278 d = pfc::utf8_decode_char(p, c1);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
279 if ( d > 0 ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
280 p += d;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
281 d = pfc::utf8_decode_char(p, c2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
282 if (d > 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
283 if (c1 < 0x10000 && c2 < 0x10000) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
284 reverse[c1 | (c2 << 16)] = walk.from;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
285 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
286 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
287 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
288 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
289 m_twoCharMappings.initialize(std::move(mappings));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
290 m_twoCharMappingsReverse.initialize(std::move(reverse));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
291 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
292 bool SmartStrStr::testSubString_prefix(const char* str, const char* sub, const char * prefix, size_t prefixLen) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
293
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
294 switch(prefixLen) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
295 case 0:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
296 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
297 case 1:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
298 for(const char * walk = str;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
299 walk = strchr(walk, *prefix);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
300 if ( walk == nullptr ) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
301 ++walk;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
302 if (matchHere(walk, sub)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
303 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
304 default:
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
305 for(const char * walk = str;; ) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
306 walk = strstr(walk, prefix);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
307 if ( walk == nullptr ) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
308 walk += prefixLen;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
309 if (matchHere(walk, sub)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
310 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
311 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
312 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
313 bool SmartStrStr::testSubString_prefix(const char* str, const char* sub, uint32_t c) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
314 size_t tempLen;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
315 char temp[8];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
316 tempLen = pfc::utf8_encode_char(c, temp); temp[tempLen] = 0;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
317 return testSubString_prefix(str, sub, temp, tempLen);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
318 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
319 bool SmartStrStr::testSubString_prefix_subst(const char* str, const char* sub, uint32_t prefix) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
320 if ( testSubString_prefix(str, sub, prefix)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
321
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
322 auto alt = m_substitutionsReverse.query_ptr( prefix );
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
323 if (alt != nullptr) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
324 for (auto c : *alt) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
325 if (testSubString_prefix(str, sub, c)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
326 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
327 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
328
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
329 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
330 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
331 bool SmartStrStr::testSubstring(const char* str, const char* sub) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
332 #if 1
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
333 // optimized version for UTF-8
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
334 unsigned prefix;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
335 const size_t skip = pfc::uni_decode_char(sub, prefix);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
336 if ( skip == 0 ) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
337 sub += skip;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
338
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
339 if (testSubString_prefix_subst(str, sub, prefix)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
340
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
341 unsigned prefix2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
342 const size_t skip2 = pfc::uni_decode_char(sub, prefix2);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
343 if (skip2 > 0 && prefix < 0x10000 && prefix2 < 0x10000) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
344 sub += skip2;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
345 auto alt = m_twoCharMappingsReverse.query(prefix | (prefix2 << 16));
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
346 if (alt != 0) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
347 if (testSubString_prefix_subst(str, sub, alt)) return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
348 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
349 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
350
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
351 return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
352 #else
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
353 return this->strStrEnd(str, sub) != nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
354 #endif
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
355 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
356 bool SmartStrStr::testSubstring16(const char16_t* str, const char16_t* sub) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
357 return this->strStrEnd16(str, sub) != nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
358 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
359 bool SmartStrStr::testSubstringW( const wchar_t * str, const wchar_t * sub ) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
360 return this->strStrEndW(str, sub) != nullptr;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
361 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
362
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
363 SmartStrStr& SmartStrStr::global() {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
364 static SmartStrStr g;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
365 return g;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
366 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
367
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
368
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
369 void SmartStrFilter::init(const char* ptr, size_t len) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
370 pfc::string_formatter current, temp;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
371 bool inQuotation = false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
372
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
373 auto addCurrent = [&] {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
374 if (!current.is_empty()) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
375 ++m_items[current.get_ptr()]; current.reset();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
376 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
377 };
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
378
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
379 for (t_size walk = 0; walk < len; ++walk) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
380 const char c = ptr[walk];
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
381 if (c == '\"') inQuotation = !inQuotation;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
382 else if (!inQuotation && is_spacing(c)) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
383 addCurrent();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
384 } else {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
385 current.add_byte(c);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
386 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
387 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
388 if (inQuotation) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
389 // Allow unbalanced quotes, take the whole string *with* quotation marks
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
390 m_items.clear();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
391 current.set_string_nc(ptr, len);
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
392 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
393
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
394 addCurrent();
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
395 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
396
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
397
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
398 bool SmartStrFilter::test_disregardCounts(const char* src) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
399 if (m_items.empty()) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
400
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
401 for (auto& walk : m_items) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
402 if (!dc->strStrEnd(src, walk.first.c_str())) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
403 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
404 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
405 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
406
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
407 bool SmartStrFilter::testWords(const char* src) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
408 if (m_items.empty()) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
409
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
410 for (auto& walk : m_items) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
411 const auto count = walk.second;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
412 const auto& str = walk.first;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
413 const auto* strWalk = src;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
414 for (size_t i = 0; i < count; ++i) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
415 auto next = dc->strStrEndWord(strWalk, str.c_str());
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
416 if (next == nullptr) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
417 strWalk = next;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
418 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
419 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
420 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
421 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
422
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
423 bool SmartStrFilter::test(const char* src) const {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
424
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
425 if (m_items.empty()) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
426
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
427 // Use the faster routine first, it can't be used to count occurances but nobody really knows about this feature
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
428 for (auto& walk : m_items) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
429 if (!dc->testSubstring(src, walk.first.c_str())) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
430 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
431 // Have any items where specific number of occurances is wanted?
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
432 for (auto & walk : m_items) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
433 const auto count = walk.second;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
434 if (count == 1) continue;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
435 const auto& str = walk.first;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
436 const auto* strWalk = src;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
437 for (size_t i = 0; i < count; ++i) {
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
438 auto next = dc->strStrEnd(strWalk, str.c_str());
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
439 if (next == nullptr) return false;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
440 strWalk = next;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
441 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
442 }
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
443 return true;
20d02a178406 *: check in everything else
Paper <paper@tflc.us>
parents:
diff changeset
444 }