Mercurial > minori
view src/core/strings.cc @ 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Thu, 02 Nov 2023 15:22:02 -0400 |
parents | 9b2b41f83a5e |
children | 503bc1547d49 |
line wrap: on
line source
/** * strings.cpp: Useful functions for manipulating strings **/ #include "core/strings.h" #include <QByteArray> #include <QString> #include <QLocale> #include <algorithm> #include <cctype> #include <codecvt> #include <locale> #include <string> #include <vector> namespace Strings { std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { if (vector.size() < 1) return "-"; std::string out = ""; for (unsigned long long i = 0; i < vector.size(); i++) { out.append(vector.at(i)); if (i < vector.size() - 1) out.append(delimiter); } return out; } std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { size_t pos = 0; while ((pos = string.find(find, pos)) != std::string::npos) { string.replace(pos, find.length(), replace); pos += replace.length(); } return string; } /* :) */ std::string SanitizeLineEndings(const std::string& string) { return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); } /* removes dumb HTML tags because anilist is aids and gives us HTML for synopses :/ */ std::string RemoveHtmlTags(const std::string& string) { std::string html(string); while (html.find("<") != std::string::npos) { auto startpos = html.find("<"); auto endpos = html.find(">") + 1; if (endpos != std::string::npos) { html.erase(startpos, endpos - startpos); } } return html; } /* e.g. "<" for "<" */ std::string ParseHtmlEntities(const std::string& string) { const std::unordered_map<std::string, std::string> map = { {"<", "<"}, {"&rt;", ">"}, {" ", "\xA0"}, {"&", "&"}, {""", "\""}, {"'", "'"}, {"¢", "¢"}, {"£", "£"}, {"€", "€"}, {"¥", "¥"}, {"©", "©"}, {"®", "®"}, {"’", "’"} // Haibane Renmei, AniList }; std::string ret = string; for (const auto& item : map) ret = ReplaceAll(ret, item.first, item.second); return ret; } /* */ std::string TextifySynopsis(const std::string& string) { return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); } /* let Qt handle the heavy lifting of locale shit I don't want to deal with */ std::string ToUpper(const std::string& string) { /* todo: this "locale" will have to be moved to session.h */ QLocale locale; return ToUtf8String(locale.toUpper(ToQString(string))); } std::string ToLower(const std::string& string) { QLocale locale; return ToUtf8String(locale.toLower(ToQString(string))); } std::wstring ToWstring(const std::string& string) { std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; return converter.from_bytes(string); } std::wstring ToWstring(const QString& string) { std::wstring arr(string.size(), L'\0'); string.toWCharArray(&arr.front()); return arr; } std::string ToUtf8String(const std::wstring& wstring) { std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; return converter.to_bytes(wstring); } std::string ToUtf8String(const QString& string) { QByteArray ba = string.toUtf8(); return std::string(ba.constData(), ba.size()); } std::string ToUtf8String(const QByteArray& ba) { return std::string(ba.constData(), ba.size()); } QString ToQString(const std::string& string) { return QString::fromUtf8(string.c_str(), string.length()); } QString ToQString(const std::wstring& wstring) { return QString::fromWCharArray(wstring.c_str(), wstring.length()); } } // namespace Strings