Mercurial > minori
comparison src/core/strings.cc @ 369:47c9f8502269
*: clang-format all the things
I've edited the formatting a bit. Now pointer asterisks (and reference
ampersands) are on the variable instead of the type, as well as having
newlines for function braces (but nothing else)
author | Paper <paper@tflc.us> |
---|---|
date | Fri, 25 Jul 2025 10:16:02 -0400 |
parents | f81bed4e04ac |
children |
comparison
equal
deleted
inserted
replaced
368:6d37a998cf91 | 369:47c9f8502269 |
---|---|
3 **/ | 3 **/ |
4 #include "core/strings.h" | 4 #include "core/strings.h" |
5 #include "core/session.h" // locale | 5 #include "core/session.h" // locale |
6 | 6 |
7 #include <QByteArray> | 7 #include <QByteArray> |
8 #include <QCoreApplication> | |
8 #include <QDebug> | 9 #include <QDebug> |
9 #include <QLocale> | 10 #include <QLocale> |
10 #include <QString> | 11 #include <QString> |
11 #include <QTextDocument> | 12 #include <QTextDocument> |
12 #include <QCoreApplication> | |
13 | 13 |
14 #include <algorithm> | 14 #include <algorithm> |
15 #include <cctype> | 15 #include <cctype> |
16 #include <codecvt> | 16 #include <codecvt> |
17 #include <iomanip> | |
17 #include <iostream> | 18 #include <iostream> |
18 #include <iomanip> | |
19 #include <locale> | 19 #include <locale> |
20 #include <string> | 20 #include <string> |
21 #include <unordered_map> | 21 #include <unordered_map> |
22 #include <vector> | 22 #include <vector> |
23 | 23 |
24 #include "utf8proc.h" | 24 #include "utf8proc.h" |
25 | 25 |
26 namespace Strings { | 26 namespace Strings { |
27 | 27 |
28 /* ew */ | 28 /* ew */ |
29 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { | 29 std::string Implode(const std::vector<std::string> &vector, const std::string &delimiter) |
30 { | |
30 if (vector.size() < 1) | 31 if (vector.size() < 1) |
31 return ""; | 32 return ""; |
32 | 33 |
33 std::string out; | 34 std::string out; |
34 | 35 |
39 } | 40 } |
40 | 41 |
41 return out; | 42 return out; |
42 } | 43 } |
43 | 44 |
44 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) { | 45 std::vector<std::string> Split(const std::string &text, const std::string &delimiter) |
46 { | |
45 if (text.length() < 1) | 47 if (text.length() < 1) |
46 return {}; | 48 return {}; |
47 | 49 |
48 std::vector<std::string> tokens; | 50 std::vector<std::string> tokens; |
49 | 51 |
58 } | 60 } |
59 | 61 |
60 /* This function is really only used for cleaning up the synopsis of | 62 /* This function is really only used for cleaning up the synopsis of |
61 * horrible HTML debris from AniList :) | 63 * horrible HTML debris from AniList :) |
62 */ | 64 */ |
63 void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) { | 65 void ReplaceAll(std::string &string, std::string_view find, std::string_view replace) |
66 { | |
64 size_t pos = 0; | 67 size_t pos = 0; |
65 while ((pos = string.find(find, pos)) != std::string::npos) { | 68 while ((pos = string.find(find, pos)) != std::string::npos) { |
66 string.replace(pos, find.length(), replace); | 69 string.replace(pos, find.length(), replace); |
67 pos += replace.length(); | 70 pos += replace.length(); |
68 } | 71 } |
69 } | 72 } |
70 | 73 |
71 void ConvertRomanNumerals(std::string& string) { | 74 void ConvertRomanNumerals(std::string &string) |
75 { | |
72 static const std::vector<std::pair<std::string_view, std::string_view>> vec = { | 76 static const std::vector<std::pair<std::string_view, std::string_view>> vec = { |
73 {"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"}, | 77 {"2", "II" }, |
74 {"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"}, | 78 {"3", "III" }, |
75 {"13", "XIII"} | 79 {"4", "IV" }, |
76 }; | 80 {"5", "V" }, |
77 | 81 {"6", "VI" }, |
78 for (const auto& item : vec) | 82 {"7", "VII" }, |
83 {"8", "VIII"}, | |
84 {"9", "IX" }, | |
85 {"11", "XI" }, | |
86 {"12", "XII" }, | |
87 {"13", "XIII"} | |
88 }; | |
89 | |
90 for (const auto &item : vec) | |
79 ReplaceAll(string, item.second, item.first); | 91 ReplaceAll(string, item.second, item.first); |
80 } | 92 } |
81 | 93 |
82 /* this also performs case folding, so our string is lowercase after this */ | 94 /* this also performs case folding, so our string is lowercase after this */ |
83 void NormalizeUnicode(std::string& string) { | 95 void NormalizeUnicode(std::string &string) |
96 { | |
84 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( | 97 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( |
85 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | | 98 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | |
86 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | | 99 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS); |
87 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS | |
88 ); | |
89 | 100 |
90 /* ack */ | 101 /* ack */ |
91 utf8proc_uint8_t* buf = nullptr; | 102 utf8proc_uint8_t *buf = nullptr; |
92 | 103 |
93 const utf8proc_ssize_t size = utf8proc_map( | 104 const utf8proc_ssize_t size = |
94 reinterpret_cast<const utf8proc_uint8_t*>(string.data()), | 105 utf8proc_map(reinterpret_cast<const utf8proc_uint8_t *>(string.data()), string.size(), &buf, options); |
95 string.size(), | |
96 &buf, | |
97 options | |
98 ); | |
99 | 106 |
100 if (buf) { | 107 if (buf) { |
101 if (size) | 108 if (size) |
102 string.assign(reinterpret_cast<const char*>(buf), size); | 109 string.assign(reinterpret_cast<const char *>(buf), size); |
103 | 110 |
104 std::free(buf); | 111 std::free(buf); |
105 } | 112 } |
106 } | 113 } |
107 | 114 |
108 void NormalizeAnimeTitle(std::string& string) { | 115 void NormalizeAnimeTitle(std::string &string) |
116 { | |
109 ConvertRomanNumerals(string); | 117 ConvertRomanNumerals(string); |
110 NormalizeUnicode(string); | 118 NormalizeUnicode(string); |
111 RemoveLeadingChars(string, ' '); | 119 RemoveLeadingChars(string, ' '); |
112 RemoveTrailingChars(string, ' '); | 120 RemoveTrailingChars(string, ' '); |
113 } | 121 } |
114 | 122 |
115 void TextifySynopsis(std::string& string) { | 123 void TextifySynopsis(std::string &string) |
124 { | |
116 /* Just let Qt deal with it. */ | 125 /* Just let Qt deal with it. */ |
117 QTextDocument text; | 126 QTextDocument text; |
118 text.setHtml(Strings::ToQString(string)); | 127 text.setHtml(Strings::ToQString(string)); |
119 string = Strings::ToUtf8String(text.toPlainText()); | 128 string = Strings::ToUtf8String(text.toPlainText()); |
120 } | 129 } |
121 | 130 |
122 /* let Qt handle the heavy lifting of locale shit | 131 /* let Qt handle the heavy lifting of locale shit |
123 * I don't want to deal with | 132 * I don't want to deal with |
124 */ | 133 */ |
125 std::string ToUpper(const std::string& string) { | 134 std::string ToUpper(const std::string &string) |
135 { | |
126 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); | 136 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); |
127 } | 137 } |
128 | 138 |
129 std::string ToLower(const std::string& string) { | 139 std::string ToLower(const std::string &string) |
140 { | |
130 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); | 141 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); |
131 } | 142 } |
132 | 143 |
133 std::wstring ToWstring(const std::string& string) { | 144 std::wstring ToWstring(const std::string &string) |
145 { | |
134 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); | 146 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
135 | 147 |
136 std::wstring wstr; | 148 std::wstring wstr; |
137 try { | 149 try { |
138 wstr = converter.from_bytes(string); | 150 wstr = converter.from_bytes(string); |
139 } catch (std::range_error const& ex) { | 151 } catch (std::range_error const &ex) { |
140 /* XXX how? */ | 152 /* XXX how? */ |
141 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; | 153 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; |
142 } | 154 } |
143 return wstr; | 155 return wstr; |
144 } | 156 } |
145 | 157 |
146 std::wstring ToWstring(const QString& string) { | 158 std::wstring ToWstring(const QString &string) |
159 { | |
147 std::wstring arr(string.size(), L'\0'); | 160 std::wstring arr(string.size(), L'\0'); |
148 string.toWCharArray(&arr.front()); | 161 string.toWCharArray(&arr.front()); |
149 return arr; | 162 return arr; |
150 } | 163 } |
151 | 164 |
152 std::string ToUtf8String(const std::wstring& wstring) { | 165 std::string ToUtf8String(const std::wstring &wstring) |
166 { | |
153 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); | 167 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
154 return converter.to_bytes(wstring); | 168 return converter.to_bytes(wstring); |
155 } | 169 } |
156 | 170 |
157 std::string ToUtf8String(const std::u32string& u32string) { | 171 std::string ToUtf8String(const std::u32string &u32string) |
172 { | |
158 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; | 173 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; |
159 return converter.to_bytes(u32string); | 174 return converter.to_bytes(u32string); |
160 } | 175 } |
161 | 176 |
162 std::u32string ToUcs4String(const std::string& string) { | 177 std::u32string ToUcs4String(const std::string &string) |
178 { | |
163 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; | 179 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; |
164 return converter.from_bytes(string); | 180 return converter.from_bytes(string); |
165 } | 181 } |
166 | 182 |
167 std::string ToUtf8String(const QString& string) { | 183 std::string ToUtf8String(const QString &string) |
184 { | |
168 const QByteArray ba = string.toUtf8(); | 185 const QByteArray ba = string.toUtf8(); |
169 return std::string(ba.constData(), ba.size()); | 186 return std::string(ba.constData(), ba.size()); |
170 } | 187 } |
171 | 188 |
172 std::string ToUtf8String(const QByteArray& ba) { | 189 std::string ToUtf8String(const QByteArray &ba) |
190 { | |
173 return std::string(ba.constData(), ba.size()); | 191 return std::string(ba.constData(), ba.size()); |
174 } | 192 } |
175 | 193 |
176 QString ToQString(const std::string& string) { | 194 QString ToQString(const std::string &string) |
195 { | |
177 return QString::fromUtf8(string.c_str(), string.length()); | 196 return QString::fromUtf8(string.c_str(), string.length()); |
178 } | 197 } |
179 | 198 |
180 QString ToQString(const std::wstring& wstring) { | 199 QString ToQString(const std::wstring &wstring) |
200 { | |
181 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | 201 return QString::fromWCharArray(wstring.c_str(), wstring.length()); |
182 } | 202 } |
183 | 203 |
184 std::string ToUtf8String(const bool b) { | 204 std::string ToUtf8String(const bool b) |
205 { | |
185 return b ? "true" : "false"; // lol | 206 return b ? "true" : "false"; // lol |
186 } | 207 } |
187 | 208 |
188 bool ToBool(const std::string& str, bool def) { | 209 bool ToBool(const std::string &str, bool def) |
210 { | |
189 std::istringstream s(Strings::ToLower(str)); | 211 std::istringstream s(Strings::ToLower(str)); |
190 s >> std::boolalpha >> def; | 212 s >> std::boolalpha >> def; |
191 return def; | 213 return def; |
192 } | 214 } |
193 | 215 |
194 template<typename T> | 216 template<typename T> |
195 constexpr T ipow(T num, unsigned int pow) { | 217 constexpr T ipow(T num, unsigned int pow) |
196 return (pow >= sizeof(unsigned int)*8) ? 0 : | 218 { |
197 pow == 0 ? 1 : num * ipow(num, pow-1); | 219 return (pow >= sizeof(unsigned int) * 8) ? 0 : pow == 0 ? 1 : num * ipow(num, pow - 1); |
198 } | 220 } |
199 | 221 |
200 /* util funcs */ | 222 /* util funcs */ |
201 uint64_t HumanReadableSizeToBytes(const std::string& str) { | 223 uint64_t HumanReadableSizeToBytes(const std::string &str) |
224 { | |
202 static const std::unordered_map<std::string, uint64_t> bytes_map = { | 225 static const std::unordered_map<std::string, uint64_t> bytes_map = { |
203 {"KB", 1e3}, | 226 {"KB", 1e3 }, |
204 {"MB", 1e6}, | 227 {"MB", 1e6 }, |
205 {"GB", 1e9}, | 228 {"GB", 1e9 }, |
206 {"TB", 1e12}, | 229 {"TB", 1e12 }, |
207 {"PB", 1e15}, | 230 {"PB", 1e15 }, |
208 {"KiB", 1ull << 10}, | 231 {"KiB", 1ull << 10}, |
209 {"MiB", 1ull << 20}, | 232 {"MiB", 1ull << 20}, |
210 {"GiB", 1ull << 30}, | 233 {"GiB", 1ull << 30}, |
211 {"TiB", 1ull << 40}, | 234 {"TiB", 1ull << 40}, |
212 {"PiB", 1ull << 50} /* surely we won't need more than this */ | 235 {"PiB", 1ull << 50} /* surely we won't need more than this */ |
213 }; | 236 }; |
214 | 237 |
215 for (const auto& suffix : bytes_map) { | 238 for (const auto &suffix : bytes_map) { |
216 if (str.find(suffix.first) != std::string::npos) { | 239 if (str.find(suffix.first) != std::string::npos) { |
217 try { | 240 try { |
218 uint64_t size = std::stod(str) * suffix.second; | 241 uint64_t size = std::stod(str) * suffix.second; |
219 return size; | 242 return size; |
220 } catch (std::invalid_argument const& ex) { | 243 } catch (std::invalid_argument const &ex) { |
221 continue; | 244 continue; |
222 } | 245 } |
223 } | 246 } |
224 } | 247 } |
225 | 248 |
226 return ToInt(str, 0); | 249 return ToInt(str, 0); |
227 } | 250 } |
228 | 251 |
229 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) { | 252 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) |
253 { | |
230 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) | 254 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) |
231 /* QLocale in Qt >= 5.10.0 has a function for this */ | 255 /* QLocale in Qt >= 5.10.0 has a function for this */ |
232 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision)); | 256 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision)); |
233 #else | 257 #else |
234 static const std::unordered_map<uint64_t, std::string> map = { | 258 static const std::unordered_map<uint64_t, std::string> map = { |
235 {1ull << 10, "KiB"}, | 259 {1ull << 10, "KiB"}, |
236 {1ull << 20, "MiB"}, | 260 {1ull << 20, "MiB"}, |
237 {1ull << 30, "GiB"}, | 261 {1ull << 30, "GiB"}, |
238 {1ull << 40, "TiB"}, | 262 {1ull << 40, "TiB"}, |
239 {1ull << 50, "PiB"} | 263 {1ull << 50, "PiB"} |
240 }; | 264 }; |
241 | 265 |
242 for (const auto& suffix : map) { | 266 for (const auto &suffix : map) { |
243 if (bytes / suffix.first < 1) | 267 if (bytes / suffix.first < 1) |
244 continue; | 268 continue; |
245 | 269 |
246 std::stringstream ss; | 270 std::stringstream ss; |
247 ss << std::setprecision(precision) | 271 ss << std::setprecision(precision) << (static_cast<double>(bytes) / suffix.first) << " " << suffix.second; |
248 << (static_cast<double>(bytes) / suffix.first) << " " | |
249 << suffix.second; | |
250 return ss.str(); | 272 return ss.str(); |
251 } | 273 } |
252 | 274 |
253 /* better luck next time */ | 275 /* better luck next time */ |
254 return "0 bytes"; | 276 return "0 bytes"; |
255 #endif | 277 #endif |
256 } | 278 } |
257 | 279 |
258 void RemoveLeadingChars(std::string& s, const char c) { | 280 void RemoveLeadingChars(std::string &s, const char c) |
281 { | |
259 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); | 282 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
260 } | 283 } |
261 | 284 |
262 void RemoveTrailingChars(std::string& s, const char c) { | 285 void RemoveTrailingChars(std::string &s, const char c) |
286 { | |
263 s.erase(s.find_last_not_of(c) + 1, std::string::npos); | 287 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
264 } | 288 } |
265 | 289 |
266 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { | 290 bool BeginningMatchesSubstring(const std::string &str, const std::string &sub) |
291 { | |
267 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | 292 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) |
268 if (str[i] != sub[i]) | 293 if (str[i] != sub[i]) |
269 return false; | 294 return false; |
270 | 295 |
271 return true; | 296 return true; |
272 } | 297 } |
273 | 298 |
274 std::string Translate(const char* str) { | 299 std::string Translate(const char *str) |
300 { | |
275 return Strings::ToUtf8String(QCoreApplication::tr(str)); | 301 return Strings::ToUtf8String(QCoreApplication::tr(str)); |
276 } | 302 } |
277 | 303 |
278 } // namespace Strings | 304 } // namespace Strings |