Mercurial > minori
annotate src/core/strings.cc @ 114:ab191e28e69d
*: add initial torrent stuff
WOAH!
these checkboxes are a pain in my fucking ass
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Tue, 07 Nov 2023 08:03:42 -0500 |
parents | b315f3759c56 |
children | 254b1d2b7096 |
rev | line source |
---|---|
9 | 1 /** |
2 * strings.cpp: Useful functions for manipulating strings | |
3 **/ | |
4 #include "core/strings.h" | |
64 | 5 #include <QByteArray> |
101
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
6 #include <QDebug> |
64 | 7 #include <QString> |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
8 #include <QLocale> |
15 | 9 #include <algorithm> |
10 #include <cctype> | |
62 | 11 #include <codecvt> |
9 | 12 #include <locale> |
13 #include <string> | |
14 #include <vector> | |
102 | 15 #include <unordered_map> |
9 | 16 |
17 namespace Strings { | |
18 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
19 /* ew */ |
9 | 20 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { |
21 if (vector.size() < 1) | |
22 return "-"; | |
23 std::string out = ""; | |
24 for (unsigned long long i = 0; i < vector.size(); i++) { | |
25 out.append(vector.at(i)); | |
26 if (i < vector.size() - 1) | |
27 out.append(delimiter); | |
28 } | |
29 return out; | |
30 } | |
31 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
32 /* This function is really only used for cleaning up the synopsis of |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
33 horrible HTML debris from AniList :) */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
34 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
35 size_t pos = 0; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
36 while ((pos = string.find(find, pos)) != std::string::npos) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
37 string.replace(pos, find.length(), replace); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
38 pos += replace.length(); |
9 | 39 } |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
40 return string; |
9 | 41 } |
42 | |
43 std::string SanitizeLineEndings(const std::string& string) { | |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
44 /* LOL */ |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
45 return |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
46 ReplaceAll( |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
47 ReplaceAll( |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
48 ReplaceAll( |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
49 ReplaceAll( |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
50 ReplaceAll(string, "\r\n", "\n"), |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
51 "</p>", "\n"), |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
52 "<br>", "\n"), |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
53 "<br />", "\n"), |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
54 "\n\n\n", "\n\n"); |
9 | 55 } |
56 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
57 /* removes dumb HTML tags because anilist is aids and |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
58 gives us HTML for synopses :/ */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
59 std::string RemoveHtmlTags(std::string string) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
60 while (string.find("<") != std::string::npos) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
61 auto startpos = string.find("<"); |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
62 auto endpos = string.find(">") + 1; |
9 | 63 |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
64 if (endpos != std::string::npos) |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
65 string.erase(startpos, endpos - startpos); |
9 | 66 } |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
67 return string; |
9 | 68 } |
69 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
70 /* e.g. "<" for "<" */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
71 std::string ParseHtmlEntities(std::string string) { |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
72 const std::unordered_map<std::string, std::string> map = { |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
73 /* The only one of these I can understand using are the first |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
74 three. why do the rest of these exist? */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
75 {"<", "<"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
76 {"&rt;", ">"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
77 {" ", "\xA0"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
78 {"&", "&"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
79 {""", "\""}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
80 {"'", "'"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
81 {"¢", "¢"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
82 {"£", "£"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
83 {"€", "€"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
84 {"¥", "¥"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
85 {"©", "©"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
86 {"®", "®"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
87 {"’", "’"} // Haibane Renmei, AniList |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
88 }; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
89 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
90 for (const auto& item : map) |
100
f5940a575d83
track/constants: add many more video formats
Paper <mrpapersonic@gmail.com>
parents:
99
diff
changeset
|
91 string = ReplaceAll(string, item.first, item.second); |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
92 return string; |
9 | 93 } |
94 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
95 /* removes stupid HTML stuff */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
96 std::string TextifySynopsis(const std::string& string) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
97 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
98 } |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
99 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
100 /* let Qt handle the heavy lifting of locale shit |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
101 I don't want to deal with */ |
15 | 102 std::string ToUpper(const std::string& string) { |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
103 /* todo: this "locale" will have to be moved to session.h |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
104 it also defaults to en-US, which sucks very much for |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
105 anyone who doesn't speak american english... */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
106 QLocale locale; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
107 return ToUtf8String(locale.toUpper(ToQString(string))); |
15 | 108 } |
109 | |
110 std::string ToLower(const std::string& string) { | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
111 QLocale locale; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
112 return ToUtf8String(locale.toLower(ToQString(string))); |
15 | 113 } |
114 | |
62 | 115 std::wstring ToWstring(const std::string& string) { |
116 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
117 return converter.from_bytes(string); | |
118 } | |
119 | |
64 | 120 std::wstring ToWstring(const QString& string) { |
121 std::wstring arr(string.size(), L'\0'); | |
122 string.toWCharArray(&arr.front()); | |
123 return arr; | |
124 } | |
125 | |
62 | 126 std::string ToUtf8String(const std::wstring& wstring) { |
127 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
128 return converter.to_bytes(wstring); | |
129 } | |
130 | |
64 | 131 std::string ToUtf8String(const QString& string) { |
132 QByteArray ba = string.toUtf8(); | |
77 | 133 return std::string(ba.constData(), ba.size()); |
134 } | |
135 | |
136 std::string ToUtf8String(const QByteArray& ba) { | |
137 return std::string(ba.constData(), ba.size()); | |
64 | 138 } |
139 | |
140 QString ToQString(const std::string& string) { | |
141 return QString::fromUtf8(string.c_str(), string.length()); | |
142 } | |
143 | |
144 QString ToQString(const std::wstring& wstring) { | |
145 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | |
146 } | |
147 | |
101
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
148 int ToInt(const std::string& str, int def) { |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
149 int tmp = 0; |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
150 try { |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
151 tmp = std::stoi(str); |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
152 } catch (std::invalid_argument const& ex) { |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
153 qDebug() << "Failed to parse int from std::string: no number found in " << ToQString(str) << " defaulting to " << def; |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
154 tmp = def; |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
155 } |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
156 return tmp; |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
157 } |
c537996cf67b
*: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents:
100
diff
changeset
|
158 |
114
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
159 uint64_t HumanReadableSizeToBytes(const std::string& str) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
160 const std::unordered_map<std::string, uint64_t> bytes_map = { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
161 {"KB", 1 << 10}, |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
162 {"MB", 1 << 20}, |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
163 {"GB", 1 << 30}, |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
164 {"TB", 1 << 40}, |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
165 {"PB", 1 << 50} /* surely we won't need more than this */ |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
166 }; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
167 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
168 for (const auto& suffix : bytes_map) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
169 if (str.find(suffix.first) != std::string::npos) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
170 try { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
171 uint64_t size = std::stod(str) * suffix.second; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
172 return size; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
173 } catch (std::invalid_argument const& ex) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
174 continue; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
175 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
176 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
177 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
178 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
179 return ToInt(str, 0); |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
180 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
181 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
182 std::string RemoveLeadingChars(std::string s, const char c) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
183 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
184 return s; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
185 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
186 |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
187 std::string RemoveTrailingChars(std::string s, const char c) { |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
188 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
189 return s; |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
190 } |
ab191e28e69d
*: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents:
102
diff
changeset
|
191 |
102 | 192 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { |
193 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | |
194 if (str[i] != sub[i]) | |
195 return false; | |
196 return true; | |
197 } | |
198 | |
9 | 199 } // namespace Strings |