Mercurial > minori
changeset 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Thu, 02 Nov 2023 15:22:02 -0400 |
parents | 18979b066284 |
children | 503bc1547d49 |
files | dep/animia/src/main.cpp include/core/strings.h src/core/strings.cc src/main.cc |
diffstat | 4 files changed, 53 insertions(+), 27 deletions(-) [+] |
line wrap: on
line diff
--- a/dep/animia/src/main.cpp Thu Nov 02 13:14:15 2023 -0400 +++ b/dep/animia/src/main.cpp Thu Nov 02 15:22:02 2023 -0400 @@ -2,6 +2,7 @@ #include "os.h" #include "linux.h" #include "win32.h" +#include "animia.h" #include <string> #include <unordered_map> #include <vector>
--- a/include/core/strings.h Thu Nov 02 13:14:15 2023 -0400 +++ b/include/core/strings.h Thu Nov 02 15:22:02 2023 -0400 @@ -13,9 +13,10 @@ std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter); /* Substring removal functions */ -std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace); +std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace); std::string SanitizeLineEndings(const std::string& string); std::string RemoveHtmlTags(const std::string& string); +std::string ParseHtmlEntities(const std::string& string); /* stupid HTML bullshit */ std::string TextifySynopsis(const std::string& string); @@ -23,6 +24,8 @@ std::string ToUpper(const std::string& string); std::string ToLower(const std::string& string); +/* functions that make the way we convert from and to + different string formats universal */ std::wstring ToWstring(const std::string& string); std::wstring ToWstring(const QString& string); std::string ToUtf8String(const std::wstring& wstring);
--- a/src/core/strings.cc Thu Nov 02 13:14:15 2023 -0400 +++ b/src/core/strings.cc Thu Nov 02 15:22:02 2023 -0400 @@ -4,6 +4,7 @@ #include "core/strings.h" #include <QByteArray> #include <QString> +#include <QLocale> #include <algorithm> #include <cctype> #include <codecvt> @@ -25,27 +26,22 @@ return out; } -std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) { - std::string result; - size_t pos, find_len = find.size(), from = 0; - while ((pos = string.find(find, from)) != std::string::npos) { - result.append(string, from, pos - from); - result.append(replace); - from = pos + find_len; +std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { + size_t pos = 0; + while ((pos = string.find(find, pos)) != std::string::npos) { + string.replace(pos, find.length(), replace); + pos += replace.length(); } - result.append(string, from, std::string::npos); - return result; + return string; } -/* this function probably fucks your RAM but whatevs */ +/* :) */ std::string SanitizeLineEndings(const std::string& string) { - std::string result(string); - result = ReplaceAll(result, "\r\n", "\n"); - result = ReplaceAll(result, "<br>", "\n"); - result = ReplaceAll(result, "\n\n\n", "\n\n"); - return result; + return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); } +/* removes dumb HTML tags because anilist is aids and + gives us HTML for synopses :/ */ std::string RemoveHtmlTags(const std::string& string) { std::string html(string); while (html.find("<") != std::string::npos) { @@ -59,23 +55,46 @@ return html; } -std::string TextifySynopsis(const std::string& string) { - return RemoveHtmlTags(SanitizeLineEndings(string)); +/* e.g. "<" for "<" */ +std::string ParseHtmlEntities(const std::string& string) { + const std::unordered_map<std::string, std::string> map = { + {"<", "<"}, + {"&rt;", ">"}, + {" ", "\xA0"}, + {"&", "&"}, + {""", "\""}, + {"'", "'"}, + {"¢", "¢"}, + {"£", "£"}, + {"€", "€"}, + {"¥", "¥"}, + {"©", "©"}, + {"®", "®"}, + {"’", "’"} // Haibane Renmei, AniList + }; + + std::string ret = string; + for (const auto& item : map) + ret = ReplaceAll(ret, item.first, item.second); + return ret; } -/* these functions suck for i18n!... - but we only use them with JSON - stuff anyway */ +/* */ +std::string TextifySynopsis(const std::string& string) { + return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); +} + +/* let Qt handle the heavy lifting of locale shit + I don't want to deal with */ std::string ToUpper(const std::string& string) { - std::string result(string); - std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); }); - return result; + /* todo: this "locale" will have to be moved to session.h */ + QLocale locale; + return ToUtf8String(locale.toUpper(ToQString(string))); } std::string ToLower(const std::string& string) { - std::string result(string); - std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); - return result; + QLocale locale; + return ToUtf8String(locale.toLower(ToQString(string))); } std::wstring ToWstring(const std::string& string) {
--- a/src/main.cc Thu Nov 02 13:14:15 2023 -0400 +++ b/src/main.cc Thu Nov 02 15:22:02 2023 -0400 @@ -2,11 +2,14 @@ #include "gui/window.h" #include <QApplication> #include <QStyleFactory> +#include <QLocale> Session session; int main(int argc, char** argv) { QApplication app(argc, argv); + /* this is a reasonable default, I presume */ + QLocale::setDefault(QLocale(QLocale::English, QLocale::UnitedStates)); session.config.Load();