|
1
|
1 #include "pfc-lite.h"
|
|
|
2
|
|
|
3 #include "string-compare.h"
|
|
|
4 #include "string_base.h"
|
|
|
5 #include "debug.h"
|
|
|
6 #include "bsearch_inline.h"
|
|
|
7 #include "sortstring.h"
|
|
|
8
|
|
|
9 namespace pfc {
|
|
|
10 unsigned charToANSI(unsigned GotChar, unsigned fallback) {
|
|
|
11 if (GotChar < 128) return GotChar;
|
|
|
12
|
|
|
13 static constexpr uint16_t from[] = {L'\u00C0', L'\u00C1', L'\u00C2', L'\u00C3', L'\u00C4', L'\u00C5', L'\u00C7', L'\u00C8', L'\u00C9', L'\u00CA', L'\u00CB', L'\u00CC', L'\u00CD', L'\u00CE', L'\u00CF', L'\u00D1', L'\u00D2', L'\u00D3', L'\u00D4', L'\u00D5', L'\u00D6', L'\u00D8', L'\u00D9', L'\u00DA', L'\u00DB', L'\u00DC', L'\u00DD', L'\u00E0', L'\u00E1', L'\u00E2', L'\u00E3', L'\u00E4', L'\u00E5', L'\u00E7', L'\u00E8', L'\u00E9', L'\u00EA', L'\u00EB', L'\u00EC', L'\u00ED', L'\u00EE', L'\u00EF', L'\u00F0', L'\u00F1', L'\u00F2', L'\u00F3', L'\u00F4', L'\u00F5', L'\u00F6', L'\u00F8', L'\u00F9', L'\u00FA', L'\u00FB', L'\u00FC', L'\u00FD', L'\u0100', L'\u0101', L'\u0102', L'\u0103', L'\u0104', L'\u0105', L'\u0106', L'\u0107', L'\u0108', L'\u0109', L'\u010A', L'\u010B', L'\u010C', L'\u010D', L'\u010E', L'\u010F', L'\u0110', L'\u0111', L'\u0112', L'\u0113', L'\u0114', L'\u0115', L'\u0116', L'\u0117', L'\u0118', L'\u0119', L'\u011A', L'\u011B', L'\u011C', L'\u011D', L'\u011E', L'\u011F', L'\u0120', L'\u0121', L'\u0122', L'\u0123', L'\u0128', L'\u0129', L'\u012A', L'\u012B', L'\u012C', L'\u012D', L'\u012E', L'\u012F', L'\u0130', L'\u0131', L'\u0134', L'\u0135', L'\u0136', L'\u0137', L'\u0139', L'\u013A', L'\u013B', L'\u013C', L'\u013D', L'\u013E', L'\u013F', L'\u0140', L'\u0141', L'\u0142', L'\u0143', L'\u0144', L'\u0145', L'\u0146', L'\u0147', L'\u0148', L'\u0149', L'\u014A', L'\u014B', L'\u014C', L'\u014D', L'\u014E', L'\u014F', L'\u0150', L'\u0151', L'\u0154', L'\u0155', L'\u0156', L'\u0157', L'\u0158', L'\u0159', L'\u015A', L'\u015B', L'\u015C', L'\u015D', L'\u015E', L'\u015F', L'\u0160', L'\u0161', L'\u0162', L'\u0163', L'\u0164', L'\u0165', L'\u0166', L'\u0167', L'\u0168', L'\u0169', L'\u016A', L'\u016B', L'\u016C', L'\u016D', L'\u016E', L'\u016F', L'\u0170', L'\u0171', L'\u0172', L'\u0173', L'\u0174', L'\u0175', L'\u0176', L'\u0177', L'\u0178', L'\u0179', L'\u017A', L'\u017B', L'\u017C', L'\u017D', L'\u017E'};
|
|
|
14 static constexpr uint16_t to[] = {L'\u0041', L'\u0041', L'\u0041', L'\u0041', L'\u0041', L'\u0041', L'\u0043', L'\u0045', L'\u0045', L'\u0045', L'\u0045', L'\u0049', L'\u0049', L'\u0049', L'\u0049', L'\u004E', L'\u004F', L'\u004F', L'\u004F', L'\u004F', L'\u004F', L'\u004F', L'\u0055', L'\u0055', L'\u0055', L'\u0055', L'\u0059', L'\u0061', L'\u0061', L'\u0061', L'\u0061', L'\u0061', L'\u0061', L'\u0063', L'\u0065', L'\u0065', L'\u0065', L'\u0065', L'\u0069', L'\u0069', L'\u0069', L'\u0069', L'\u006F', L'\u006E', L'\u006F', L'\u006F', L'\u006F', L'\u006F', L'\u006F', L'\u006F', L'\u0075', L'\u0075', L'\u0075', L'\u0075', L'\u0079', L'\u0041', L'\u0061', L'\u0041', L'\u0061', L'\u0041', L'\u0061', L'\u0043', L'\u0063', L'\u0043', L'\u0063', L'\u0043', L'\u0063', L'\u0043', L'\u0063', L'\u0044', L'\u0064', L'\u0044', L'\u0064', L'\u0045', L'\u0065', L'\u0045', L'\u0065', L'\u0045', L'\u0065', L'\u0045', L'\u0065', L'\u0045', L'\u0065', L'\u0047', L'\u0067', L'\u0047', L'\u0067', L'\u0047', L'\u0067', L'\u0047', L'\u0067', L'\u0049', L'\u0069', L'\u0049', L'\u0069', L'\u0049', L'\u0069', L'\u0049', L'\u0069', L'\u0049', L'\u0069', L'\u004A', L'\u006A', L'\u004B', L'\u006B', L'\u004C', L'\u006C', L'\u004C', L'\u006C', L'\u004C', L'\u006C', L'\u004C', L'\u006C', L'\u004C', L'\u006C', L'\u004E', L'\u006E', L'\u004E', L'\u006E', L'\u004E', L'\u006E', L'\u006E', L'\u004E', L'\u006E', L'\u004F', L'\u006F', L'\u004F', L'\u006F', L'\u004F', L'\u006F', L'\u0052', L'\u0072', L'\u0052', L'\u0072', L'\u0052', L'\u0072', L'\u0053', L'\u0073', L'\u0053', L'\u0073', L'\u0053', L'\u0073', L'\u0053', L'\u0073', L'\u0054', L'\u0074', L'\u0054', L'\u0074', L'\u0054', L'\u0074', L'\u0055', L'\u0075', L'\u0055', L'\u0075', L'\u0055', L'\u0075', L'\u0055', L'\u0075', L'\u0055', L'\u0075', L'\u0055', L'\u0075', L'\u0057', L'\u0077', L'\u0059', L'\u0079', L'\u0059', L'\u005A', L'\u007A', L'\u005A', L'\u007A', L'\u005A', L'\u007A'};
|
|
|
15 static_assert(std::size(from) == std::size(to));
|
|
|
16
|
|
|
17 size_t idx;
|
|
|
18 if (bsearch_simple_inline_t(from, std::size(from), GotChar, idx)) {
|
|
|
19 return to[idx];
|
|
|
20 }
|
|
|
21
|
|
|
22 return fallback;
|
|
|
23 }
|
|
|
24
|
|
|
25 int stricmp_ascii_partial(const char* str, const char* substr) throw() {
|
|
|
26 size_t walk = 0;
|
|
|
27 for (;;) {
|
|
|
28 char c1 = str[walk];
|
|
|
29 char c2 = substr[walk];
|
|
|
30 c1 = ascii_tolower(c1); c2 = ascii_tolower(c2);
|
|
|
31 if (c2 == 0) return 0; // substr terminated = ret0 regardless of str content
|
|
|
32 if (c1 < c2) return -1; // ret -1 early
|
|
|
33 else if (c1 > c2) return 1; // ret 1 early
|
|
|
34 // else c1 == c2 and c2 != 0 so c1 != 0 either
|
|
|
35 ++walk; // go on
|
|
|
36 }
|
|
|
37 }
|
|
|
38
|
|
|
39 bool stringEqualsI_ascii_ex(const char* s1, size_t len1, const char* s2, size_t len2) throw() {
|
|
|
40 t_size walk1 = 0, walk2 = 0;
|
|
|
41 for (;;) {
|
|
|
42 char c1 = (walk1 < len1) ? s1[walk1] : 0;
|
|
|
43 char c2 = (walk2 < len2) ? s2[walk2] : 0;
|
|
|
44 c1 = ascii_tolower(c1); c2 = ascii_tolower(c2);
|
|
|
45 if (c1 != c2) return false;
|
|
|
46 if (c1 == 0) return true;
|
|
|
47 walk1++;
|
|
|
48 walk2++;
|
|
|
49 }
|
|
|
50 }
|
|
|
51
|
|
|
52 int stricmp_ascii_ex(const char* const s1, t_size const len1, const char* const s2, t_size const len2) throw() {
|
|
|
53 t_size walk1 = 0, walk2 = 0;
|
|
|
54 for (;;) {
|
|
|
55 char c1 = (walk1 < len1) ? s1[walk1] : 0;
|
|
|
56 char c2 = (walk2 < len2) ? s2[walk2] : 0;
|
|
|
57 c1 = ascii_tolower(c1); c2 = ascii_tolower(c2);
|
|
|
58 if (c1 < c2) return -1;
|
|
|
59 else if (c1 > c2) return 1;
|
|
|
60 else if (c1 == 0) return 0;
|
|
|
61 walk1++;
|
|
|
62 walk2++;
|
|
|
63 }
|
|
|
64 }
|
|
|
65
|
|
|
66 int wstricmp_ascii(const wchar_t* s1, const wchar_t* s2) throw() {
|
|
|
67 for (;;) {
|
|
|
68 wchar_t c1 = *s1, c2 = *s2;
|
|
|
69
|
|
|
70 if (c1 > 0 && c2 > 0 && c1 < 128 && c2 < 128) {
|
|
|
71 c1 = ascii_tolower_lookup((char)c1);
|
|
|
72 c2 = ascii_tolower_lookup((char)c2);
|
|
|
73 } else {
|
|
|
74 if (c1 == 0 && c2 == 0) return 0;
|
|
|
75 }
|
|
|
76 if (c1 < c2) return -1;
|
|
|
77 else if (c1 > c2) return 1;
|
|
|
78 else if (c1 == 0) return 0;
|
|
|
79
|
|
|
80 s1++;
|
|
|
81 s2++;
|
|
|
82 }
|
|
|
83 }
|
|
|
84
|
|
|
85 int stricmp_ascii(const char* s1, const char* s2) throw() {
|
|
|
86 for (;;) {
|
|
|
87 char c1 = *s1, c2 = *s2;
|
|
|
88
|
|
|
89 if (c1 > 0 && c2 > 0) {
|
|
|
90 c1 = ascii_tolower_lookup(c1);
|
|
|
91 c2 = ascii_tolower_lookup(c2);
|
|
|
92 } else {
|
|
|
93 if (c1 == 0 && c2 == 0) return 0;
|
|
|
94 }
|
|
|
95 if (c1 < c2) return -1;
|
|
|
96 else if (c1 > c2) return 1;
|
|
|
97 else if (c1 == 0) return 0;
|
|
|
98
|
|
|
99 s1++;
|
|
|
100 s2++;
|
|
|
101 }
|
|
|
102 }
|
|
|
103
|
|
|
104 static int naturalSortCompareInternal(const char* s1, const char* s2, bool insensitive) throw() {
|
|
|
105 for (;; ) {
|
|
|
106 unsigned c1, c2;
|
|
|
107 size_t d1 = utf8_decode_char(s1, c1);
|
|
|
108 size_t d2 = utf8_decode_char(s2, c2);
|
|
|
109 if (d1 == 0 && d2 == 0) {
|
|
|
110 return 0;
|
|
|
111 }
|
|
|
112 if (char_is_numeric(c1) && char_is_numeric(c2)) {
|
|
|
113 // Numeric block in both strings, do natural sort magic here
|
|
|
114 size_t l1 = 1, l2 = 1;
|
|
|
115 while (char_is_numeric(s1[l1])) ++l1;
|
|
|
116 while (char_is_numeric(s2[l2])) ++l2;
|
|
|
117
|
|
|
118 size_t l = max_t(l1, l2);
|
|
|
119 for (int pass = 0; pass < 2; ++pass) {
|
|
|
120 const char filler = pass ? 'z' : '0';
|
|
|
121 for (size_t w = 0; w < l; ++w) {
|
|
|
122 char digit1 = filler, digit2 = filler;
|
|
|
123
|
|
|
124 t_ssize off;
|
|
|
125
|
|
|
126 off = w + l1 - l;
|
|
|
127 if (off >= 0) {
|
|
|
128 digit1 = s1[w - l + l1];
|
|
|
129 }
|
|
|
130 off = w + l2 - l;
|
|
|
131 if (off >= 0) {
|
|
|
132 digit2 = s2[w - l + l2];
|
|
|
133 }
|
|
|
134 if (digit1 < digit2) return -1;
|
|
|
135 if (digit1 > digit2) return 1;
|
|
|
136 }
|
|
|
137 }
|
|
|
138 s1 += l1; s2 += l2;
|
|
|
139 continue;
|
|
|
140 }
|
|
|
141
|
|
|
142 unsigned alt1 = charToANSI(c1, c1), alt2 = charToANSI(c2, c2);
|
|
|
143 if (alt1 != c1 || alt2 != c2) {
|
|
|
144 if (insensitive) {
|
|
|
145 alt1 = charLower(alt1);
|
|
|
146 alt2 = charLower(alt2);
|
|
|
147 }
|
|
|
148 if (alt1 < alt2) return -1;
|
|
|
149 if (alt1 > alt2) return 1;
|
|
|
150 }
|
|
|
151
|
|
|
152 if (insensitive) {
|
|
|
153 c1 = charLower(c1);
|
|
|
154 c2 = charLower(c2);
|
|
|
155 }
|
|
|
156 if (c1 < c2) return -1;
|
|
|
157 if (c1 > c2) return 1;
|
|
|
158
|
|
|
159 s1 += d1; s2 += d2;
|
|
|
160 }
|
|
|
161 }
|
|
|
162 int naturalSortCompare(const char* s1, const char* s2) throw() {
|
|
|
163 int v = naturalSortCompareInternal(s1, s2, true);
|
|
|
164 if (v) return v;
|
|
|
165 v = naturalSortCompareInternal(s1, s2, false);
|
|
|
166 if (v) return v;
|
|
|
167 return strcmp(s1, s2);
|
|
|
168 }
|
|
|
169
|
|
|
170 int naturalSortCompareI(const char* s1, const char* s2) throw() {
|
|
|
171 return naturalSortCompareInternal(s1, s2, true);
|
|
|
172 }
|
|
|
173 #ifdef _WIN32
|
|
|
174 int winNaturalSortCompare(const char* s1, const char* s2);
|
|
|
175 int winNaturalSortCompareI(const char* s1, const char* s2);
|
|
|
176 #endif
|
|
|
177 #ifdef __APPLE__
|
|
|
178 int appleNaturalSortCompare(const char* s1, const char* s2);
|
|
|
179 int appleNaturalSortCompareI(const char* s1, const char* s2);
|
|
|
180 #endif
|
|
|
181 int sysNaturalSortCompare(const char* s1, const char* s2) {
|
|
|
182 #ifdef _WIN32
|
|
|
183 return winNaturalSortCompare(s1, s2);
|
|
|
184 #elif defined(__APPLE__)
|
|
|
185 return appleNaturalSortCompare(s1, s2);
|
|
|
186 #else
|
|
|
187 return naturalSortCompare(s1, s2);
|
|
|
188 #endif
|
|
|
189 }
|
|
|
190 int sysNaturalSortCompareI(const char* s1, const char* s2) {
|
|
|
191 #ifdef _WIN32
|
|
|
192 return winNaturalSortCompareI(s1, s2);
|
|
|
193 #elif defined(__APPLE__)
|
|
|
194 return appleNaturalSortCompareI(s1, s2);
|
|
|
195 #else
|
|
|
196 return naturalSortCompareI(s1, s2);
|
|
|
197 #endif
|
|
|
198 }
|
|
|
199 const char* _stringComparatorCommon::myStringToPtr(string_part_ref) {
|
|
|
200 pfc::crash();
|
|
|
201 }
|
|
|
202
|
|
|
203 int stringCompareCaseInsensitiveEx(string_part_ref s1, string_part_ref s2) {
|
|
|
204 t_size w1 = 0, w2 = 0;
|
|
|
205 for (;;) {
|
|
|
206 unsigned c1, c2; t_size d1, d2;
|
|
|
207 d1 = utf8_decode_char(s1.m_ptr + w1, c1, s1.m_len - w1);
|
|
|
208 d2 = utf8_decode_char(s2.m_ptr + w2, c2, s2.m_len - w2);
|
|
|
209 if (d1 == 0 && d2 == 0) return 0;
|
|
|
210 else if (d1 == 0) return -1;
|
|
|
211 else if (d2 == 0) return 1;
|
|
|
212 else {
|
|
|
213 c1 = charLower(c1); c2 = charLower(c2);
|
|
|
214 if (c1 < c2) return -1;
|
|
|
215 else if (c1 > c2) return 1;
|
|
|
216 }
|
|
|
217 w1 += d1; w2 += d2;
|
|
|
218 }
|
|
|
219 }
|
|
|
220 int stringCompareCaseInsensitive(const char* s1, const char* s2) {
|
|
|
221 for (;;) {
|
|
|
222 unsigned c1, c2; t_size d1, d2;
|
|
|
223 d1 = utf8_decode_char(s1, c1);
|
|
|
224 d2 = utf8_decode_char(s2, c2);
|
|
|
225 if (d1 == 0 && d2 == 0) return 0;
|
|
|
226 else if (d1 == 0) return -1;
|
|
|
227 else if (d2 == 0) return 1;
|
|
|
228 else {
|
|
|
229 c1 = charLower(c1); c2 = charLower(c2);
|
|
|
230 if (c1 < c2) return -1;
|
|
|
231 else if (c1 > c2) return 1;
|
|
|
232 }
|
|
|
233 s1 += d1; s2 += d2;
|
|
|
234 }
|
|
|
235 }
|
|
|
236 #ifdef PFC_SORTSTRING_GENERIC
|
|
|
237 int sortStringCompare(const char* str1, const char* str2) {
|
|
|
238 return naturalSortCompare(str1, str2);
|
|
|
239 }
|
|
|
240 int sortStringCompareI(const char* str1, const char* str2) {
|
|
|
241 return naturalSortCompareI(str1, str2);
|
|
|
242 }
|
|
|
243 #endif
|
|
|
244 }
|