Mercurial > minori
comparison src/core/strings.cc @ 369:47c9f8502269
*: clang-format all the things
I've edited the formatting a bit. Now pointer asterisks (and reference
ampersands) are on the variable instead of the type, as well as having
newlines for function braces (but nothing else)
| author | Paper <paper@tflc.us> |
|---|---|
| date | Fri, 25 Jul 2025 10:16:02 -0400 |
| parents | f81bed4e04ac |
| children |
comparison
equal
deleted
inserted
replaced
| 368:6d37a998cf91 | 369:47c9f8502269 |
|---|---|
| 3 **/ | 3 **/ |
| 4 #include "core/strings.h" | 4 #include "core/strings.h" |
| 5 #include "core/session.h" // locale | 5 #include "core/session.h" // locale |
| 6 | 6 |
| 7 #include <QByteArray> | 7 #include <QByteArray> |
| 8 #include <QCoreApplication> | |
| 8 #include <QDebug> | 9 #include <QDebug> |
| 9 #include <QLocale> | 10 #include <QLocale> |
| 10 #include <QString> | 11 #include <QString> |
| 11 #include <QTextDocument> | 12 #include <QTextDocument> |
| 12 #include <QCoreApplication> | |
| 13 | 13 |
| 14 #include <algorithm> | 14 #include <algorithm> |
| 15 #include <cctype> | 15 #include <cctype> |
| 16 #include <codecvt> | 16 #include <codecvt> |
| 17 #include <iomanip> | |
| 17 #include <iostream> | 18 #include <iostream> |
| 18 #include <iomanip> | |
| 19 #include <locale> | 19 #include <locale> |
| 20 #include <string> | 20 #include <string> |
| 21 #include <unordered_map> | 21 #include <unordered_map> |
| 22 #include <vector> | 22 #include <vector> |
| 23 | 23 |
| 24 #include "utf8proc.h" | 24 #include "utf8proc.h" |
| 25 | 25 |
| 26 namespace Strings { | 26 namespace Strings { |
| 27 | 27 |
| 28 /* ew */ | 28 /* ew */ |
| 29 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { | 29 std::string Implode(const std::vector<std::string> &vector, const std::string &delimiter) |
| 30 { | |
| 30 if (vector.size() < 1) | 31 if (vector.size() < 1) |
| 31 return ""; | 32 return ""; |
| 32 | 33 |
| 33 std::string out; | 34 std::string out; |
| 34 | 35 |
| 39 } | 40 } |
| 40 | 41 |
| 41 return out; | 42 return out; |
| 42 } | 43 } |
| 43 | 44 |
| 44 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) { | 45 std::vector<std::string> Split(const std::string &text, const std::string &delimiter) |
| 46 { | |
| 45 if (text.length() < 1) | 47 if (text.length() < 1) |
| 46 return {}; | 48 return {}; |
| 47 | 49 |
| 48 std::vector<std::string> tokens; | 50 std::vector<std::string> tokens; |
| 49 | 51 |
| 58 } | 60 } |
| 59 | 61 |
| 60 /* This function is really only used for cleaning up the synopsis of | 62 /* This function is really only used for cleaning up the synopsis of |
| 61 * horrible HTML debris from AniList :) | 63 * horrible HTML debris from AniList :) |
| 62 */ | 64 */ |
| 63 void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) { | 65 void ReplaceAll(std::string &string, std::string_view find, std::string_view replace) |
| 66 { | |
| 64 size_t pos = 0; | 67 size_t pos = 0; |
| 65 while ((pos = string.find(find, pos)) != std::string::npos) { | 68 while ((pos = string.find(find, pos)) != std::string::npos) { |
| 66 string.replace(pos, find.length(), replace); | 69 string.replace(pos, find.length(), replace); |
| 67 pos += replace.length(); | 70 pos += replace.length(); |
| 68 } | 71 } |
| 69 } | 72 } |
| 70 | 73 |
| 71 void ConvertRomanNumerals(std::string& string) { | 74 void ConvertRomanNumerals(std::string &string) |
| 75 { | |
| 72 static const std::vector<std::pair<std::string_view, std::string_view>> vec = { | 76 static const std::vector<std::pair<std::string_view, std::string_view>> vec = { |
| 73 {"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"}, | 77 {"2", "II" }, |
| 74 {"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"}, | 78 {"3", "III" }, |
| 75 {"13", "XIII"} | 79 {"4", "IV" }, |
| 76 }; | 80 {"5", "V" }, |
| 77 | 81 {"6", "VI" }, |
| 78 for (const auto& item : vec) | 82 {"7", "VII" }, |
| 83 {"8", "VIII"}, | |
| 84 {"9", "IX" }, | |
| 85 {"11", "XI" }, | |
| 86 {"12", "XII" }, | |
| 87 {"13", "XIII"} | |
| 88 }; | |
| 89 | |
| 90 for (const auto &item : vec) | |
| 79 ReplaceAll(string, item.second, item.first); | 91 ReplaceAll(string, item.second, item.first); |
| 80 } | 92 } |
| 81 | 93 |
| 82 /* this also performs case folding, so our string is lowercase after this */ | 94 /* this also performs case folding, so our string is lowercase after this */ |
| 83 void NormalizeUnicode(std::string& string) { | 95 void NormalizeUnicode(std::string &string) |
| 96 { | |
| 84 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( | 97 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( |
| 85 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | | 98 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | |
| 86 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | | 99 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS); |
| 87 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS | |
| 88 ); | |
| 89 | 100 |
| 90 /* ack */ | 101 /* ack */ |
| 91 utf8proc_uint8_t* buf = nullptr; | 102 utf8proc_uint8_t *buf = nullptr; |
| 92 | 103 |
| 93 const utf8proc_ssize_t size = utf8proc_map( | 104 const utf8proc_ssize_t size = |
| 94 reinterpret_cast<const utf8proc_uint8_t*>(string.data()), | 105 utf8proc_map(reinterpret_cast<const utf8proc_uint8_t *>(string.data()), string.size(), &buf, options); |
| 95 string.size(), | |
| 96 &buf, | |
| 97 options | |
| 98 ); | |
| 99 | 106 |
| 100 if (buf) { | 107 if (buf) { |
| 101 if (size) | 108 if (size) |
| 102 string.assign(reinterpret_cast<const char*>(buf), size); | 109 string.assign(reinterpret_cast<const char *>(buf), size); |
| 103 | 110 |
| 104 std::free(buf); | 111 std::free(buf); |
| 105 } | 112 } |
| 106 } | 113 } |
| 107 | 114 |
| 108 void NormalizeAnimeTitle(std::string& string) { | 115 void NormalizeAnimeTitle(std::string &string) |
| 116 { | |
| 109 ConvertRomanNumerals(string); | 117 ConvertRomanNumerals(string); |
| 110 NormalizeUnicode(string); | 118 NormalizeUnicode(string); |
| 111 RemoveLeadingChars(string, ' '); | 119 RemoveLeadingChars(string, ' '); |
| 112 RemoveTrailingChars(string, ' '); | 120 RemoveTrailingChars(string, ' '); |
| 113 } | 121 } |
| 114 | 122 |
| 115 void TextifySynopsis(std::string& string) { | 123 void TextifySynopsis(std::string &string) |
| 124 { | |
| 116 /* Just let Qt deal with it. */ | 125 /* Just let Qt deal with it. */ |
| 117 QTextDocument text; | 126 QTextDocument text; |
| 118 text.setHtml(Strings::ToQString(string)); | 127 text.setHtml(Strings::ToQString(string)); |
| 119 string = Strings::ToUtf8String(text.toPlainText()); | 128 string = Strings::ToUtf8String(text.toPlainText()); |
| 120 } | 129 } |
| 121 | 130 |
| 122 /* let Qt handle the heavy lifting of locale shit | 131 /* let Qt handle the heavy lifting of locale shit |
| 123 * I don't want to deal with | 132 * I don't want to deal with |
| 124 */ | 133 */ |
| 125 std::string ToUpper(const std::string& string) { | 134 std::string ToUpper(const std::string &string) |
| 135 { | |
| 126 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); | 136 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string))); |
| 127 } | 137 } |
| 128 | 138 |
| 129 std::string ToLower(const std::string& string) { | 139 std::string ToLower(const std::string &string) |
| 140 { | |
| 130 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); | 141 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string))); |
| 131 } | 142 } |
| 132 | 143 |
| 133 std::wstring ToWstring(const std::string& string) { | 144 std::wstring ToWstring(const std::string &string) |
| 145 { | |
| 134 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); | 146 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
| 135 | 147 |
| 136 std::wstring wstr; | 148 std::wstring wstr; |
| 137 try { | 149 try { |
| 138 wstr = converter.from_bytes(string); | 150 wstr = converter.from_bytes(string); |
| 139 } catch (std::range_error const& ex) { | 151 } catch (std::range_error const &ex) { |
| 140 /* XXX how? */ | 152 /* XXX how? */ |
| 141 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; | 153 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl; |
| 142 } | 154 } |
| 143 return wstr; | 155 return wstr; |
| 144 } | 156 } |
| 145 | 157 |
| 146 std::wstring ToWstring(const QString& string) { | 158 std::wstring ToWstring(const QString &string) |
| 159 { | |
| 147 std::wstring arr(string.size(), L'\0'); | 160 std::wstring arr(string.size(), L'\0'); |
| 148 string.toWCharArray(&arr.front()); | 161 string.toWCharArray(&arr.front()); |
| 149 return arr; | 162 return arr; |
| 150 } | 163 } |
| 151 | 164 |
| 152 std::string ToUtf8String(const std::wstring& wstring) { | 165 std::string ToUtf8String(const std::wstring &wstring) |
| 166 { | |
| 153 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); | 167 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L""); |
| 154 return converter.to_bytes(wstring); | 168 return converter.to_bytes(wstring); |
| 155 } | 169 } |
| 156 | 170 |
| 157 std::string ToUtf8String(const std::u32string& u32string) { | 171 std::string ToUtf8String(const std::u32string &u32string) |
| 172 { | |
| 158 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; | 173 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; |
| 159 return converter.to_bytes(u32string); | 174 return converter.to_bytes(u32string); |
| 160 } | 175 } |
| 161 | 176 |
| 162 std::u32string ToUcs4String(const std::string& string) { | 177 std::u32string ToUcs4String(const std::string &string) |
| 178 { | |
| 163 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; | 179 static std::wstring_convert<std::codecvt_utf8_utf16<char32_t>, char32_t> converter; |
| 164 return converter.from_bytes(string); | 180 return converter.from_bytes(string); |
| 165 } | 181 } |
| 166 | 182 |
| 167 std::string ToUtf8String(const QString& string) { | 183 std::string ToUtf8String(const QString &string) |
| 184 { | |
| 168 const QByteArray ba = string.toUtf8(); | 185 const QByteArray ba = string.toUtf8(); |
| 169 return std::string(ba.constData(), ba.size()); | 186 return std::string(ba.constData(), ba.size()); |
| 170 } | 187 } |
| 171 | 188 |
| 172 std::string ToUtf8String(const QByteArray& ba) { | 189 std::string ToUtf8String(const QByteArray &ba) |
| 190 { | |
| 173 return std::string(ba.constData(), ba.size()); | 191 return std::string(ba.constData(), ba.size()); |
| 174 } | 192 } |
| 175 | 193 |
| 176 QString ToQString(const std::string& string) { | 194 QString ToQString(const std::string &string) |
| 195 { | |
| 177 return QString::fromUtf8(string.c_str(), string.length()); | 196 return QString::fromUtf8(string.c_str(), string.length()); |
| 178 } | 197 } |
| 179 | 198 |
| 180 QString ToQString(const std::wstring& wstring) { | 199 QString ToQString(const std::wstring &wstring) |
| 200 { | |
| 181 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | 201 return QString::fromWCharArray(wstring.c_str(), wstring.length()); |
| 182 } | 202 } |
| 183 | 203 |
| 184 std::string ToUtf8String(const bool b) { | 204 std::string ToUtf8String(const bool b) |
| 205 { | |
| 185 return b ? "true" : "false"; // lol | 206 return b ? "true" : "false"; // lol |
| 186 } | 207 } |
| 187 | 208 |
| 188 bool ToBool(const std::string& str, bool def) { | 209 bool ToBool(const std::string &str, bool def) |
| 210 { | |
| 189 std::istringstream s(Strings::ToLower(str)); | 211 std::istringstream s(Strings::ToLower(str)); |
| 190 s >> std::boolalpha >> def; | 212 s >> std::boolalpha >> def; |
| 191 return def; | 213 return def; |
| 192 } | 214 } |
| 193 | 215 |
| 194 template<typename T> | 216 template<typename T> |
| 195 constexpr T ipow(T num, unsigned int pow) { | 217 constexpr T ipow(T num, unsigned int pow) |
| 196 return (pow >= sizeof(unsigned int)*8) ? 0 : | 218 { |
| 197 pow == 0 ? 1 : num * ipow(num, pow-1); | 219 return (pow >= sizeof(unsigned int) * 8) ? 0 : pow == 0 ? 1 : num * ipow(num, pow - 1); |
| 198 } | 220 } |
| 199 | 221 |
| 200 /* util funcs */ | 222 /* util funcs */ |
| 201 uint64_t HumanReadableSizeToBytes(const std::string& str) { | 223 uint64_t HumanReadableSizeToBytes(const std::string &str) |
| 224 { | |
| 202 static const std::unordered_map<std::string, uint64_t> bytes_map = { | 225 static const std::unordered_map<std::string, uint64_t> bytes_map = { |
| 203 {"KB", 1e3}, | 226 {"KB", 1e3 }, |
| 204 {"MB", 1e6}, | 227 {"MB", 1e6 }, |
| 205 {"GB", 1e9}, | 228 {"GB", 1e9 }, |
| 206 {"TB", 1e12}, | 229 {"TB", 1e12 }, |
| 207 {"PB", 1e15}, | 230 {"PB", 1e15 }, |
| 208 {"KiB", 1ull << 10}, | 231 {"KiB", 1ull << 10}, |
| 209 {"MiB", 1ull << 20}, | 232 {"MiB", 1ull << 20}, |
| 210 {"GiB", 1ull << 30}, | 233 {"GiB", 1ull << 30}, |
| 211 {"TiB", 1ull << 40}, | 234 {"TiB", 1ull << 40}, |
| 212 {"PiB", 1ull << 50} /* surely we won't need more than this */ | 235 {"PiB", 1ull << 50} /* surely we won't need more than this */ |
| 213 }; | 236 }; |
| 214 | 237 |
| 215 for (const auto& suffix : bytes_map) { | 238 for (const auto &suffix : bytes_map) { |
| 216 if (str.find(suffix.first) != std::string::npos) { | 239 if (str.find(suffix.first) != std::string::npos) { |
| 217 try { | 240 try { |
| 218 uint64_t size = std::stod(str) * suffix.second; | 241 uint64_t size = std::stod(str) * suffix.second; |
| 219 return size; | 242 return size; |
| 220 } catch (std::invalid_argument const& ex) { | 243 } catch (std::invalid_argument const &ex) { |
| 221 continue; | 244 continue; |
| 222 } | 245 } |
| 223 } | 246 } |
| 224 } | 247 } |
| 225 | 248 |
| 226 return ToInt(str, 0); | 249 return ToInt(str, 0); |
| 227 } | 250 } |
| 228 | 251 |
| 229 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) { | 252 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) |
| 253 { | |
| 230 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) | 254 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0) |
| 231 /* QLocale in Qt >= 5.10.0 has a function for this */ | 255 /* QLocale in Qt >= 5.10.0 has a function for this */ |
| 232 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision)); | 256 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision)); |
| 233 #else | 257 #else |
| 234 static const std::unordered_map<uint64_t, std::string> map = { | 258 static const std::unordered_map<uint64_t, std::string> map = { |
| 235 {1ull << 10, "KiB"}, | 259 {1ull << 10, "KiB"}, |
| 236 {1ull << 20, "MiB"}, | 260 {1ull << 20, "MiB"}, |
| 237 {1ull << 30, "GiB"}, | 261 {1ull << 30, "GiB"}, |
| 238 {1ull << 40, "TiB"}, | 262 {1ull << 40, "TiB"}, |
| 239 {1ull << 50, "PiB"} | 263 {1ull << 50, "PiB"} |
| 240 }; | 264 }; |
| 241 | 265 |
| 242 for (const auto& suffix : map) { | 266 for (const auto &suffix : map) { |
| 243 if (bytes / suffix.first < 1) | 267 if (bytes / suffix.first < 1) |
| 244 continue; | 268 continue; |
| 245 | 269 |
| 246 std::stringstream ss; | 270 std::stringstream ss; |
| 247 ss << std::setprecision(precision) | 271 ss << std::setprecision(precision) << (static_cast<double>(bytes) / suffix.first) << " " << suffix.second; |
| 248 << (static_cast<double>(bytes) / suffix.first) << " " | |
| 249 << suffix.second; | |
| 250 return ss.str(); | 272 return ss.str(); |
| 251 } | 273 } |
| 252 | 274 |
| 253 /* better luck next time */ | 275 /* better luck next time */ |
| 254 return "0 bytes"; | 276 return "0 bytes"; |
| 255 #endif | 277 #endif |
| 256 } | 278 } |
| 257 | 279 |
| 258 void RemoveLeadingChars(std::string& s, const char c) { | 280 void RemoveLeadingChars(std::string &s, const char c) |
| 281 { | |
| 259 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); | 282 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
| 260 } | 283 } |
| 261 | 284 |
| 262 void RemoveTrailingChars(std::string& s, const char c) { | 285 void RemoveTrailingChars(std::string &s, const char c) |
| 286 { | |
| 263 s.erase(s.find_last_not_of(c) + 1, std::string::npos); | 287 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
| 264 } | 288 } |
| 265 | 289 |
| 266 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { | 290 bool BeginningMatchesSubstring(const std::string &str, const std::string &sub) |
| 291 { | |
| 267 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | 292 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) |
| 268 if (str[i] != sub[i]) | 293 if (str[i] != sub[i]) |
| 269 return false; | 294 return false; |
| 270 | 295 |
| 271 return true; | 296 return true; |
| 272 } | 297 } |
| 273 | 298 |
| 274 std::string Translate(const char* str) { | 299 std::string Translate(const char *str) |
| 300 { | |
| 275 return Strings::ToUtf8String(QCoreApplication::tr(str)); | 301 return Strings::ToUtf8String(QCoreApplication::tr(str)); |
| 276 } | 302 } |
| 277 | 303 |
| 278 } // namespace Strings | 304 } // namespace Strings |
