Mercurial > minori
annotate src/core/strings.cc @ 259:0362f3c4534c
widgets/graph: improve drawing code
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Mon, 01 Apr 2024 18:11:15 -0400 |
parents | 862d0d8619f6 |
children | dd211ff68b36 |
rev | line source |
---|---|
9 | 1 /** |
2 * strings.cpp: Useful functions for manipulating strings | |
3 **/ | |
4 #include "core/strings.h" | |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
5 #include "core/session.h" // locale |
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
6 |
64 | 7 #include <QByteArray> |
101
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
8 #include <QDebug> |
258 | 9 #include <QLocale> |
64 | 10 #include <QString> |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
11 |
15 | 12 #include <algorithm> |
13 #include <cctype> | |
62 | 14 #include <codecvt> |
258 | 15 #include <iostream> |
9 | 16 #include <locale> |
17 #include <string> | |
258 | 18 #include <unordered_map> |
9 | 19 #include <vector> |
20 | |
21 namespace Strings { | |
22 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
23 /* ew */ |
9 | 24 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { |
25 if (vector.size() < 1) | |
250 | 26 return ""; |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
27 |
221
53211cb1e7f5
library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents:
211
diff
changeset
|
28 std::string out; |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
29 |
9 | 30 for (unsigned long long i = 0; i < vector.size(); i++) { |
31 out.append(vector.at(i)); | |
32 if (i < vector.size() - 1) | |
33 out.append(delimiter); | |
34 } | |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
35 |
9 | 36 return out; |
37 } | |
38 | |
226 | 39 std::string Implode(const std::set<std::string>& set, const std::string& delimiter) { |
40 if (set.size() < 1) | |
250 | 41 return ""; |
226 | 42 |
43 std::string out; | |
44 | |
45 for (auto it = set.cbegin(); it != set.cend(); it++) { | |
46 out.append(*it); | |
47 if (it != std::prev(set.cend(), 1)) | |
48 out.append(delimiter); | |
49 } | |
50 | |
51 return out; | |
52 } | |
53 | |
258 | 54 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) { |
250 | 55 if (text.length() < 1) |
56 return {}; | |
57 | |
118
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
58 std::vector<std::string> tokens; |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
59 |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
60 std::size_t start = 0, end = 0; |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
61 while ((end = text.find(delimiter, start)) != std::string::npos) { |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
62 tokens.push_back(text.substr(start, end - start)); |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
63 start = end + delimiter.length(); |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
64 } |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
65 tokens.push_back(text.substr(start)); |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
66 |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
67 return tokens; |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
68 } |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
69 |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
70 /* This function is really only used for cleaning up the synopsis of |
221
53211cb1e7f5
library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents:
211
diff
changeset
|
71 * horrible HTML debris from AniList :) |
258 | 72 */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
73 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
74 size_t pos = 0; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
75 while ((pos = string.find(find, pos)) != std::string::npos) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
76 string.replace(pos, find.length(), replace); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
77 pos += replace.length(); |
9 | 78 } |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
79 return string; |
9 | 80 } |
81 | |
82 std::string SanitizeLineEndings(const std::string& string) { | |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
83 /* LOL */ |
258 | 84 return ReplaceAll(ReplaceAll(ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "</p>", "\n"), "<br>", "\n"), |
85 "<br />", "\n"), | |
86 "\n\n\n", "\n\n"); | |
9 | 87 } |
88 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
89 /* removes dumb HTML tags because anilist is aids and |
250 | 90 * gives us HTML for synopses :/ |
258 | 91 */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
92 std::string RemoveHtmlTags(std::string string) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
93 while (string.find("<") != std::string::npos) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
94 auto startpos = string.find("<"); |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
95 auto endpos = string.find(">") + 1; |
9 | 96 |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
97 if (endpos != std::string::npos) |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
98 string.erase(startpos, endpos - startpos); |
9 | 99 } |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
100 return string; |
9 | 101 } |
102 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
103 /* e.g. "<" for "<" */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
104 std::string ParseHtmlEntities(std::string string) { |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
105 const std::unordered_map<std::string, std::string> map = { |
258 | 106 /* The only one of these I can understand using are the first |
107 * three. why do the rest of these exist? | |
108 * | |
109 * probably mojibake. | |
110 */ | |
111 {"<", "<" }, | |
112 {"&rt;", ">" }, | |
113 {" ", "\xA0"}, | |
114 {"&", "&" }, | |
115 {""", "\"" }, | |
116 {"'", "'" }, | |
117 {"¢", "¢" }, | |
118 {"£", "£" }, | |
119 {"€", "€" }, | |
120 {"¥", "Â¥" }, | |
121 {"©", "©" }, | |
122 {"®", "®" }, | |
123 {"’", "’" } // Haibane Renmei, AniList | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
124 }; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
125 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
126 for (const auto& item : map) |
100
f5940a575d83
track/constants: add many more video formats
Paper <mrpapersonic@gmail.com>
parents:
99
diff
changeset
|
127 string = ReplaceAll(string, item.first, item.second); |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
128 return string; |
9 | 129 } |
130 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
131 /* removes stupid HTML stuff */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
132 std::string TextifySynopsis(const std::string& string) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
133 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
134 } |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
135 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
136 /* let Qt handle the heavy lifting of locale shit |
221
53211cb1e7f5
library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents:
211
diff
changeset
|
137 * I don't want to deal with |
258 | 138 */ |
15 | 139 std::string ToUpper(const std::string& string) { |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
140 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); |
15 | 141 } |
142 | |
143 std::string ToLower(const std::string& string) { | |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
144 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); |
15 | 145 } |
146 | |
62 | 147 std::wstring ToWstring(const std::string& string) { |
230
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
148 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
149 |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
150 std::wstring wstr; |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
151 try { |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
152 wstr = converter.from_bytes(string); |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
153 } catch (std::range_error const& ex) { |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
154 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
155 } |
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
156 return wstr; |
62 | 157 } |
158 | |
64 | 159 std::wstring ToWstring(const QString& string) { |
160 std::wstring arr(string.size(), L'\0'); | |
161 string.toWCharArray(&arr.front()); | |
162 return arr; | |
163 } | |
164 | |
62 | 165 std::string ToUtf8String(const std::wstring& wstring) { |
230
2f5a9247e501
torrents: implement download button
Paper <paper@paper.us.eu.org>
parents:
221
diff
changeset
|
166 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
62 | 167 return converter.to_bytes(wstring); |
168 } | |
169 | |
64 | 170 std::string ToUtf8String(const QString& string) { |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
171 const QByteArray ba = string.toUtf8(); |
77 | 172 return std::string(ba.constData(), ba.size()); |
173 } | |
174 | |
175 std::string ToUtf8String(const QByteArray& ba) { | |
176 return std::string(ba.constData(), ba.size()); | |
64 | 177 } |
178 | |
179 QString ToQString(const std::string& string) { | |
180 return QString::fromUtf8(string.c_str(), string.length()); | |
181 } | |
182 | |
183 QString ToQString(const std::wstring& wstring) { | |
184 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | |
185 } | |
186 | |
211
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
187 std::string ToUtf8String(const bool b) { |
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
188 return b ? "true" : "false"; // lol |
101
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
189 } |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
190 |
211
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
191 bool ToBool(const std::string& str, bool def) { |
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
192 std::istringstream s(Strings::ToLower(str)); |
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
193 s >> std::boolalpha >> def; |
116
254b1d2b7096
settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents:
114
diff
changeset
|
194 return def; |
254b1d2b7096
settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents:
114
diff
changeset
|
195 } |
254b1d2b7096
settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents:
114
diff
changeset
|
196 |
211
7cf53145de11
strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents:
187
diff
changeset
|
197 /* util funcs */ |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
198 uint64_t HumanReadableSizeToBytes(const std::string& str) { |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
199 static const std::unordered_map<std::string, uint64_t> bytes_map = { |
258 | 200 {"KB", 1ull << 10}, |
201 {"MB", 1ull << 20}, | |
202 {"GB", 1ull << 30}, | |
203 {"TB", 1ull << 40}, | |
204 {"PB", 1ull << 50} /* surely we won't need more than this */ | |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
205 }; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
206 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
207 for (const auto& suffix : bytes_map) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
208 if (str.find(suffix.first) != std::string::npos) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
209 try { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
210 uint64_t size = std::stod(str) * suffix.second; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
211 return size; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
212 } catch (std::invalid_argument const& ex) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
213 continue; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
214 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
215 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
216 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
217 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
218 return ToInt(str, 0); |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
219 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
220 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
221 std::string RemoveLeadingChars(std::string s, const char c) { |
118
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
222 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
39521c47c7a3
*: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents:
116
diff
changeset
|
223 return s; |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
224 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
225 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
226 std::string RemoveTrailingChars(std::string s, const char c) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
227 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
228 return s; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
229 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
230 |
102 | 231 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { |
232 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | |
233 if (str[i] != sub[i]) | |
234 return false; | |
187
9613d72b097e
*: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents:
120
diff
changeset
|
235 |
102 | 236 return true; |
237 } | |
238 | |
9 | 239 } // namespace Strings |