comparison src/core/strings.cc @ 369:47c9f8502269

*: clang-format all the things I've edited the formatting a bit. Now pointer asterisks (and reference ampersands) are on the variable instead of the type, as well as having newlines for function braces (but nothing else)
author Paper <paper@tflc.us>
date Fri, 25 Jul 2025 10:16:02 -0400
parents f81bed4e04ac
children
comparison
equal deleted inserted replaced
368:6d37a998cf91 369:47c9f8502269
3 **/ 3 **/
4 #include "core/strings.h" 4 #include "core/strings.h"
5 #include "core/session.h" // locale 5 #include "core/session.h" // locale
6 6
7 #include <QByteArray> 7 #include <QByteArray>
8 #include <QCoreApplication>
8 #include <QDebug> 9 #include <QDebug>
9 #include <QLocale> 10 #include <QLocale>
10 #include <QString> 11 #include <QString>
11 #include <QTextDocument> 12 #include <QTextDocument>
12 #include <QCoreApplication>
13 13
14 #include <algorithm> 14 #include <algorithm>
15 #include <cctype> 15 #include <cctype>
16 #include <codecvt> 16 #include <codecvt>
17 #include <iomanip>
17 #include <iostream> 18 #include <iostream>
18 #include <iomanip>
19 #include <locale> 19 #include <locale>
20 #include <string> 20 #include <string>
21 #include <unordered_map> 21 #include <unordered_map>
22 #include <vector> 22 #include <vector>
23 23
24 #include "utf8proc.h" 24 #include "utf8proc.h"
25 25
26 namespace Strings { 26 namespace Strings {
27 27
28 /* ew */ 28 /* ew */
29 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { 29 std::string Implode(const std::vector<std::string> &vector, const std::string &delimiter)
30 {
30 if (vector.size() < 1) 31 if (vector.size() < 1)
31 return ""; 32 return "";
32 33
33 std::string out; 34 std::string out;
34 35
39 } 40 }
40 41
41 return out; 42 return out;
42 } 43 }
43 44
44 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) { 45 std::vector<std::string> Split(const std::string &text, const std::string &delimiter)
46 {
45 if (text.length() < 1) 47 if (text.length() < 1)
46 return {}; 48 return {};
47 49
48 std::vector<std::string> tokens; 50 std::vector<std::string> tokens;
49 51
58 } 60 }
59 61
60 /* This function is really only used for cleaning up the synopsis of 62 /* This function is really only used for cleaning up the synopsis of
61 * horrible HTML debris from AniList :) 63 * horrible HTML debris from AniList :)
62 */ 64 */
63 void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) { 65 void ReplaceAll(std::string &string, std::string_view find, std::string_view replace)
66 {
64 size_t pos = 0; 67 size_t pos = 0;
65 while ((pos = string.find(find, pos)) != std::string::npos) { 68 while ((pos = string.find(find, pos)) != std::string::npos) {
66 string.replace(pos, find.length(), replace); 69 string.replace(pos, find.length(), replace);
67 pos += replace.length(); 70 pos += replace.length();
68 } 71 }
69 } 72 }
70 73
71 void ConvertRomanNumerals(std::string& string) { 74 void ConvertRomanNumerals(std::string &string)
75 {
72 static const std::vector<std::pair<std::string_view, std::string_view>> vec = { 76 static const std::vector<std::pair<std::string_view, std::string_view>> vec = {
73 {"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"}, 77 {"2", "II" },
74 {"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"}, 78 {"3", "III" },
75 {"13", "XIII"} 79 {"4", "IV" },
76 }; 80 {"5", "V" },
77 81 {"6", "VI" },
78 for (const auto& item : vec) 82 {"7", "VII" },
83 {"8", "VIII"},
84 {"9", "IX" },
85 {"11", "XI" },
86 {"12", "XII" },
87 {"13", "XIII"}
88 };
89
90 for (const auto &item : vec)
79 ReplaceAll(string, item.second, item.first); 91 ReplaceAll(string, item.second, item.first);
80 } 92 }
81 93
82 /* this also performs case folding, so our string is lowercase after this */ 94 /* this also performs case folding, so our string is lowercase after this */
83 void NormalizeUnicode(std::string& string) { 95 void NormalizeUnicode(std::string &string)
96 {
84 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( 97 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
85 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | 98 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
86 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | 99 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS);
87 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
88 );
89 100
90 /* ack */ 101 /* ack */
91 utf8proc_uint8_t* buf = nullptr; 102 utf8proc_uint8_t *buf = nullptr;
92 103
93 const utf8proc_ssize_t size = utf8proc_map( 104 const utf8proc_ssize_t size =
94 reinterpret_cast<const utf8proc_uint8_t*>(string.data()), 105 utf8proc_map(reinterpret_cast<const utf8proc_uint8_t *>(string.data()), string.size(), &buf, options);
95 string.size(),
96 &buf,
97 options
98 );
99 106
100 if (buf) { 107 if (buf) {
101 if (size) 108 if (size)
102 string.assign(reinterpret_cast<const char*>(buf), size); 109 string.assign(reinterpret_cast<const char *>(buf), size);
103 110
104 std::free(buf); 111 std::free(buf);
105 } 112 }
106 } 113 }
107 114
108 void NormalizeAnimeTitle(std::string& string) { 115 void NormalizeAnimeTitle(std::string &string)
116 {
109 ConvertRomanNumerals(string); 117 ConvertRomanNumerals(string);
110 NormalizeUnicode(string); 118 NormalizeUnicode(string);
111 RemoveLeadingChars(string, ' '); 119 RemoveLeadingChars(string, ' ');
112 RemoveTrailingChars(string, ' '); 120 RemoveTrailingChars(string, ' ');
113 } 121 }
114 122
115 void TextifySynopsis(std::string& string) { 123 void TextifySynopsis(std::string &string)
124 {
116 /* Just let Qt deal with it. */ 125 /* Just let Qt deal with it. */
117 QTextDocument text; 126 QTextDocument text;
118 text.setHtml(Strings::ToQString(string)); 127 text.setHtml(Strings::ToQString(string));
119 string = Strings::ToUtf8String(text.toPlainText()); 128 string = Strings::ToUtf8String(text.toPlainText());
120 } 129 }
121 130
122 /* let Qt handle the heavy lifting of locale shit 131 /* let Qt handle the heavy lifting of locale shit
123 * I don't want to deal with 132 * I don't want to deal with
124 */ 133 */
125 std::string ToUpper(const std::string& string) { 134 std::string ToUpper(const std::string &string)
135 {
126 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); 136 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
127 } 137 }
128 138
129 std::string ToLower(const std::string& string) { 139 std::string ToLower(const std::string &string)
140 {
130 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); 141 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
131 } 142 }
132 143
133 std::wstring ToWstring(const std::string& string) { 144 std::wstring ToWstring(const std::string &string)
145 {
134 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); 146 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
135 147
136 std::wstring wstr; 148 std::wstring wstr;
137 try { 149 try {
138 wstr = converter.from_bytes(string); 150 wstr = converter.from_bytes(string);
139 } catch (std::range_error const& ex) { 151 } catch (std::range_error const &ex) {
140 /* XXX how? */ 152 /* XXX how? */
141 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; 153 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
142 } 154 }
143 return wstr; 155 return wstr;
144 } 156 }
145 157
146 std::wstring ToWstring(const QString& string) { 158 std::wstring ToWstring(const QString &string)
159 {
147 std::wstring arr(string.size(), L'\0'); 160 std::wstring arr(string.size(), L'\0');
148 string.toWCharArray(&arr.front()); 161 string.toWCharArray(&arr.front());
149 return arr; 162 return arr;
150 } 163 }
151 164
152 std::string ToUtf8String(const std::wstring& wstring) { 165 std::string ToUtf8String(const std::wstring &wstring)
166 {
153 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); 167 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
154 return converter.to_bytes(wstring); 168 return converter.to_bytes(wstring);
155 } 169 }
156 170
157 std::string ToUtf8String(const std::u32string& u32string) { 171 std::string ToUtf8String(const std::u32string &u32string)
172 {
158 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; 173 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter;
159 return converter.to_bytes(u32string); 174 return converter.to_bytes(u32string);
160 } 175 }
161 176
162 std::u32string ToUcs4String(const std::string& string) { 177 std::u32string ToUcs4String(const std::string &string)
178 {
163 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; 179 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter;
164 return converter.from_bytes(string); 180 return converter.from_bytes(string);
165 } 181 }
166 182
167 std::string ToUtf8String(const QString& string) { 183 std::string ToUtf8String(const QString &string)
184 {
168 const QByteArray ba = string.toUtf8(); 185 const QByteArray ba = string.toUtf8();
169 return std::string(ba.constData(), ba.size()); 186 return std::string(ba.constData(), ba.size());
170 } 187 }
171 188
172 std::string ToUtf8String(const QByteArray& ba) { 189 std::string ToUtf8String(const QByteArray &ba)
190 {
173 return std::string(ba.constData(), ba.size()); 191 return std::string(ba.constData(), ba.size());
174 } 192 }
175 193
176 QString ToQString(const std::string& string) { 194 QString ToQString(const std::string &string)
195 {
177 return QString::fromUtf8(string.c_str(), string.length()); 196 return QString::fromUtf8(string.c_str(), string.length());
178 } 197 }
179 198
180 QString ToQString(const std::wstring& wstring) { 199 QString ToQString(const std::wstring &wstring)
200 {
181 return QString::fromWCharArray(wstring.c_str(), wstring.length()); 201 return QString::fromWCharArray(wstring.c_str(), wstring.length());
182 } 202 }
183 203
184 std::string ToUtf8String(const bool b) { 204 std::string ToUtf8String(const bool b)
205 {
185 return b ? "true" : "false"; // lol 206 return b ? "true" : "false"; // lol
186 } 207 }
187 208
188 bool ToBool(const std::string& str, bool def) { 209 bool ToBool(const std::string &str, bool def)
210 {
189 std::istringstream s(Strings::ToLower(str)); 211 std::istringstream s(Strings::ToLower(str));
190 s >> std::boolalpha >> def; 212 s >> std::boolalpha >> def;
191 return def; 213 return def;
192 } 214 }
193 215
194 template<typename T> 216 template<typename T>
195 constexpr T ipow(T num, unsigned int pow) { 217 constexpr T ipow(T num, unsigned int pow)
196 return (pow >= sizeof(unsigned int)*8) ? 0 : 218 {
197 pow == 0 ? 1 : num * ipow(num, pow-1); 219 return (pow >= sizeof(unsigned int) * 8) ? 0 : pow == 0 ? 1 : num * ipow(num, pow - 1);
198 } 220 }
199 221
200 /* util funcs */ 222 /* util funcs */
201 uint64_t HumanReadableSizeToBytes(const std::string& str) { 223 uint64_t HumanReadableSizeToBytes(const std::string &str)
224 {
202 static const std::unordered_map<std::string, uint64_t> bytes_map = { 225 static const std::unordered_map<std::string, uint64_t> bytes_map = {
203 {"KB", 1e3}, 226 {"KB", 1e3 },
204 {"MB", 1e6}, 227 {"MB", 1e6 },
205 {"GB", 1e9}, 228 {"GB", 1e9 },
206 {"TB", 1e12}, 229 {"TB", 1e12 },
207 {"PB", 1e15}, 230 {"PB", 1e15 },
208 {"KiB", 1ull << 10}, 231 {"KiB", 1ull << 10},
209 {"MiB", 1ull << 20}, 232 {"MiB", 1ull << 20},
210 {"GiB", 1ull << 30}, 233 {"GiB", 1ull << 30},
211 {"TiB", 1ull << 40}, 234 {"TiB", 1ull << 40},
212 {"PiB", 1ull << 50} /* surely we won't need more than this */ 235 {"PiB", 1ull << 50} /* surely we won't need more than this */
213 }; 236 };
214 237
215 for (const auto& suffix : bytes_map) { 238 for (const auto &suffix : bytes_map) {
216 if (str.find(suffix.first) != std::string::npos) { 239 if (str.find(suffix.first) != std::string::npos) {
217 try { 240 try {
218 uint64_t size = std::stod(str) * suffix.second; 241 uint64_t size = std::stod(str) * suffix.second;
219 return size; 242 return size;
220 } catch (std::invalid_argument const& ex) { 243 } catch (std::invalid_argument const &ex) {
221 continue; 244 continue;
222 } 245 }
223 } 246 }
224 } 247 }
225 248
226 return ToInt(str, 0); 249 return ToInt(str, 0);
227 } 250 }
228 251
229 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) { 252 std::string BytesToHumanReadableSize(uint64_t bytes, int precision)
253 {
230 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) 254 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0)
231 /* QLocale in Qt >= 5.10.0 has a function for this */ 255 /* QLocale in Qt >= 5.10.0 has a function for this */
232 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision)); 256 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision));
233 #else 257 #else
234 static const std::unordered_map<uint64_t, std::string> map = { 258 static const std::unordered_map<uint64_t, std::string> map = {
235 {1ull << 10, "KiB"}, 259 {1ull << 10, "KiB"},
236 {1ull << 20, "MiB"}, 260 {1ull << 20, "MiB"},
237 {1ull << 30, "GiB"}, 261 {1ull << 30, "GiB"},
238 {1ull << 40, "TiB"}, 262 {1ull << 40, "TiB"},
239 {1ull << 50, "PiB"} 263 {1ull << 50, "PiB"}
240 }; 264 };
241 265
242 for (const auto& suffix : map) { 266 for (const auto &suffix : map) {
243 if (bytes / suffix.first < 1) 267 if (bytes / suffix.first < 1)
244 continue; 268 continue;
245 269
246 std::stringstream ss; 270 std::stringstream ss;
247 ss << std::setprecision(precision) 271 ss << std::setprecision(precision) << (static_cast<double>(bytes) / suffix.first) << " " << suffix.second;
248 << (static_cast<double>(bytes) / suffix.first) << " "
249 << suffix.second;
250 return ss.str(); 272 return ss.str();
251 } 273 }
252 274
253 /* better luck next time */ 275 /* better luck next time */
254 return "0 bytes"; 276 return "0 bytes";
255 #endif 277 #endif
256 } 278 }
257 279
258 void RemoveLeadingChars(std::string& s, const char c) { 280 void RemoveLeadingChars(std::string &s, const char c)
281 {
259 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); 282 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
260 } 283 }
261 284
262 void RemoveTrailingChars(std::string& s, const char c) { 285 void RemoveTrailingChars(std::string &s, const char c)
286 {
263 s.erase(s.find_last_not_of(c) + 1, std::string::npos); 287 s.erase(s.find_last_not_of(c) + 1, std::string::npos);
264 } 288 }
265 289
266 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { 290 bool BeginningMatchesSubstring(const std::string &str, const std::string &sub)
291 {
267 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) 292 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
268 if (str[i] != sub[i]) 293 if (str[i] != sub[i])
269 return false; 294 return false;
270 295
271 return true; 296 return true;
272 } 297 }
273 298
274 std::string Translate(const char* str) { 299 std::string Translate(const char *str)
300 {
275 return Strings::ToUtf8String(QCoreApplication::tr(str)); 301 return Strings::ToUtf8String(QCoreApplication::tr(str));
276 } 302 }
277 303
278 } // namespace Strings 304 } // namespace Strings