Mercurial > minori
diff src/core/strings.cc @ 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Thu, 02 Nov 2023 15:22:02 -0400 (14 months ago) |
parents | 9b2b41f83a5e |
children | 503bc1547d49 |
line wrap: on
line diff
--- a/src/core/strings.cc Thu Nov 02 13:14:15 2023 -0400 +++ b/src/core/strings.cc Thu Nov 02 15:22:02 2023 -0400 @@ -4,6 +4,7 @@ #include "core/strings.h" #include <QByteArray> #include <QString> +#include <QLocale> #include <algorithm> #include <cctype> #include <codecvt> @@ -25,27 +26,22 @@ return out; } -std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) { - std::string result; - size_t pos, find_len = find.size(), from = 0; - while ((pos = string.find(find, from)) != std::string::npos) { - result.append(string, from, pos - from); - result.append(replace); - from = pos + find_len; +std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { + size_t pos = 0; + while ((pos = string.find(find, pos)) != std::string::npos) { + string.replace(pos, find.length(), replace); + pos += replace.length(); } - result.append(string, from, std::string::npos); - return result; + return string; } -/* this function probably fucks your RAM but whatevs */ +/* :) */ std::string SanitizeLineEndings(const std::string& string) { - std::string result(string); - result = ReplaceAll(result, "\r\n", "\n"); - result = ReplaceAll(result, "<br>", "\n"); - result = ReplaceAll(result, "\n\n\n", "\n\n"); - return result; + return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); } +/* removes dumb HTML tags because anilist is aids and + gives us HTML for synopses :/ */ std::string RemoveHtmlTags(const std::string& string) { std::string html(string); while (html.find("<") != std::string::npos) { @@ -59,23 +55,46 @@ return html; } -std::string TextifySynopsis(const std::string& string) { - return RemoveHtmlTags(SanitizeLineEndings(string)); +/* e.g. "<" for "<" */ +std::string ParseHtmlEntities(const std::string& string) { + const std::unordered_map<std::string, std::string> map = { + {"<", "<"}, + {"&rt;", ">"}, + {" ", "\xA0"}, + {"&", "&"}, + {""", "\""}, + {"'", "'"}, + {"¢", "¢"}, + {"£", "£"}, + {"€", "€"}, + {"¥", "¥"}, + {"©", "©"}, + {"®", "®"}, + {"’", "’"} // Haibane Renmei, AniList + }; + + std::string ret = string; + for (const auto& item : map) + ret = ReplaceAll(ret, item.first, item.second); + return ret; } -/* these functions suck for i18n!... - but we only use them with JSON - stuff anyway */ +/* */ +std::string TextifySynopsis(const std::string& string) { + return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); +} + +/* let Qt handle the heavy lifting of locale shit + I don't want to deal with */ std::string ToUpper(const std::string& string) { - std::string result(string); - std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); }); - return result; + /* todo: this "locale" will have to be moved to session.h */ + QLocale locale; + return ToUtf8String(locale.toUpper(ToQString(string))); } std::string ToLower(const std::string& string) { - std::string result(string); - std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); - return result; + QLocale locale; + return ToUtf8String(locale.toLower(ToQString(string))); } std::wstring ToWstring(const std::string& string) {