Mercurial > minori
view src/core/strings.cc @ 123:a45edd073f9e
deps/pugixml: update to v1.14.0
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Wed, 08 Nov 2023 21:40:02 -0500 |
parents | 275da698697d |
children | 9613d72b097e |
line wrap: on
line source
/** * strings.cpp: Useful functions for manipulating strings **/ #include "core/strings.h" #include <QByteArray> #include <QDebug> #include <QString> #include <QLocale> #include <algorithm> #include <cctype> #include <codecvt> #include <locale> #include <string> #include <vector> #include <unordered_map> namespace Strings { /* ew */ std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { if (vector.size() < 1) return "-"; std::string out = ""; for (unsigned long long i = 0; i < vector.size(); i++) { out.append(vector.at(i)); if (i < vector.size() - 1) out.append(delimiter); } return out; } std::vector<std::string> Split(const std::string &text, const std::string& delimiter) { std::vector<std::string> tokens; std::size_t start = 0, end = 0; while ((end = text.find(delimiter, start)) != std::string::npos) { tokens.push_back(text.substr(start, end - start)); start = end + delimiter.length(); } tokens.push_back(text.substr(start)); return tokens; } /* This function is really only used for cleaning up the synopsis of horrible HTML debris from AniList :) */ std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { size_t pos = 0; while ((pos = string.find(find, pos)) != std::string::npos) { string.replace(pos, find.length(), replace); pos += replace.length(); } return string; } std::string SanitizeLineEndings(const std::string& string) { /* LOL */ return ReplaceAll( ReplaceAll( ReplaceAll( ReplaceAll( ReplaceAll(string, "\r\n", "\n"), "</p>", "\n"), "<br>", "\n"), "<br />", "\n"), "\n\n\n", "\n\n"); } /* removes dumb HTML tags because anilist is aids and gives us HTML for synopses :/ */ std::string RemoveHtmlTags(std::string string) { while (string.find("<") != std::string::npos) { auto startpos = string.find("<"); auto endpos = string.find(">") + 1; if (endpos != std::string::npos) string.erase(startpos, endpos - startpos); } return string; } /* e.g. "<" for "<" */ std::string ParseHtmlEntities(std::string string) { const std::unordered_map<std::string, std::string> map = { /* The only one of these I can understand using are the first three. why do the rest of these exist? */ {"<", "<"}, {"&rt;", ">"}, {" ", "\xA0"}, {"&", "&"}, {""", "\""}, {"'", "'"}, {"¢", "¢"}, {"£", "£"}, {"€", "€"}, {"¥", "¥"}, {"©", "©"}, {"®", "®"}, {"’", "’"} // Haibane Renmei, AniList }; for (const auto& item : map) string = ReplaceAll(string, item.first, item.second); return string; } /* removes stupid HTML stuff */ std::string TextifySynopsis(const std::string& string) { return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); } /* let Qt handle the heavy lifting of locale shit I don't want to deal with */ std::string ToUpper(const std::string& string) { /* todo: this "locale" will have to be moved to session.h it also defaults to en-US, which sucks very much for anyone who doesn't speak american english... */ QLocale locale; return ToUtf8String(locale.toUpper(ToQString(string))); } std::string ToLower(const std::string& string) { QLocale locale; return ToUtf8String(locale.toLower(ToQString(string))); } std::wstring ToWstring(const std::string& string) { std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; return converter.from_bytes(string); } std::wstring ToWstring(const QString& string) { std::wstring arr(string.size(), L'\0'); string.toWCharArray(&arr.front()); return arr; } std::string ToUtf8String(const std::wstring& wstring) { std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; return converter.to_bytes(wstring); } std::string ToUtf8String(const QString& string) { QByteArray ba = string.toUtf8(); return std::string(ba.constData(), ba.size()); } std::string ToUtf8String(const QByteArray& ba) { return std::string(ba.constData(), ba.size()); } QString ToQString(const std::string& string) { return QString::fromUtf8(string.c_str(), string.length()); } QString ToQString(const std::wstring& wstring) { return QString::fromWCharArray(wstring.c_str(), wstring.length()); } /* not really an "int"... but who cares? */ int ToInt(const std::string& str, int def) { int tmp = 0; try { tmp = std::stoi(str); } catch (std::invalid_argument const& ex) { qDebug() << "Failed to parse int from std::string: no number found in " << ToQString(str) << " defaulting to " << def; tmp = def; } return tmp; } bool ToBool(const std::string& s, const bool def) { if (s.length() < 4) return def; std::string l = Strings::ToLower(s); if (Strings::BeginningMatchesSubstring(l, "true")) return true; else if (Strings::BeginningMatchesSubstring(l, "false")) return false; return def; } std::string ToUtf8String(const bool b) { return b ? "true" : "false"; } uint64_t HumanReadableSizeToBytes(const std::string& str) { const std::unordered_map<std::string, uint64_t> bytes_map = { {"KB", 1ull << 10}, {"MB", 1ull << 20}, {"GB", 1ull << 30}, {"TB", 1ull << 40}, {"PB", 1ull << 50} /* surely we won't need more than this */ }; for (const auto& suffix : bytes_map) { if (str.find(suffix.first) != std::string::npos) { try { uint64_t size = std::stod(str) * suffix.second; return size; } catch (std::invalid_argument const& ex) { continue; } } } return ToInt(str, 0); } std::string RemoveLeadingChars(std::string s, const char c) { s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); return s; } std::string RemoveTrailingChars(std::string s, const char c) { s.erase(s.find_last_not_of(c) + 1, std::string::npos); return s; } bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) if (str[i] != sub[i]) return false; return true; } } // namespace Strings