changeset 98:582b2fca1561

strings: parse HTML entities when reading synopsis, make the toupper and tolower functions more sane
author Paper <mrpapersonic@gmail.com>
date Thu, 02 Nov 2023 15:22:02 -0400
parents 18979b066284
children 503bc1547d49
files dep/animia/src/main.cpp include/core/strings.h src/core/strings.cc src/main.cc
diffstat 4 files changed, 53 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/dep/animia/src/main.cpp	Thu Nov 02 13:14:15 2023 -0400
+++ b/dep/animia/src/main.cpp	Thu Nov 02 15:22:02 2023 -0400
@@ -2,6 +2,7 @@
 #include "os.h"
 #include "linux.h"
 #include "win32.h"
+#include "animia.h"
 #include <string>
 #include <unordered_map>
 #include <vector>
--- a/include/core/strings.h	Thu Nov 02 13:14:15 2023 -0400
+++ b/include/core/strings.h	Thu Nov 02 15:22:02 2023 -0400
@@ -13,9 +13,10 @@
 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter);
 
 /* Substring removal functions */
-std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace);
+std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace);
 std::string SanitizeLineEndings(const std::string& string);
 std::string RemoveHtmlTags(const std::string& string);
+std::string ParseHtmlEntities(const std::string& string);
 
 /* stupid HTML bullshit */
 std::string TextifySynopsis(const std::string& string);
@@ -23,6 +24,8 @@
 std::string ToUpper(const std::string& string);
 std::string ToLower(const std::string& string);
 
+/* functions that make the way we convert from and to
+   different string formats universal */
 std::wstring ToWstring(const std::string& string);
 std::wstring ToWstring(const QString& string);
 std::string ToUtf8String(const std::wstring& wstring);
--- a/src/core/strings.cc	Thu Nov 02 13:14:15 2023 -0400
+++ b/src/core/strings.cc	Thu Nov 02 15:22:02 2023 -0400
@@ -4,6 +4,7 @@
 #include "core/strings.h"
 #include <QByteArray>
 #include <QString>
+#include <QLocale>
 #include <algorithm>
 #include <cctype>
 #include <codecvt>
@@ -25,27 +26,22 @@
 	return out;
 }
 
-std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) {
-	std::string result;
-	size_t pos, find_len = find.size(), from = 0;
-	while ((pos = string.find(find, from)) != std::string::npos) {
-		result.append(string, from, pos - from);
-		result.append(replace);
-		from = pos + find_len;
+std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
+	size_t pos = 0;
+	while ((pos = string.find(find, pos)) != std::string::npos) {
+		string.replace(pos, find.length(), replace);
+		pos += replace.length();
 	}
-	result.append(string, from, std::string::npos);
-	return result;
+	return string;
 }
 
-/* this function probably fucks your RAM but whatevs */
+/* :) */
 std::string SanitizeLineEndings(const std::string& string) {
-	std::string result(string);
-	result = ReplaceAll(result, "\r\n", "\n");
-	result = ReplaceAll(result, "<br>", "\n");
-	result = ReplaceAll(result, "\n\n\n", "\n\n");
-	return result;
+	return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n");
 }
 
+/* removes dumb HTML tags because anilist is aids and
+   gives us HTML for synopses :/ */
 std::string RemoveHtmlTags(const std::string& string) {
 	std::string html(string);
 	while (html.find("<") != std::string::npos) {
@@ -59,23 +55,46 @@
 	return html;
 }
 
-std::string TextifySynopsis(const std::string& string) {
-	return RemoveHtmlTags(SanitizeLineEndings(string));
+/* e.g. "&lt;" for "<" */
+std::string ParseHtmlEntities(const std::string& string) {
+	const std::unordered_map<std::string, std::string> map = {
+		{"&lt;", "<"},
+		{"&rt;", ">"},
+		{"&nbsp;", "\xA0"},
+		{"&amp;", "&"},
+		{"&quot;", "\""},
+		{"&apos;", "'"},
+		{"&cent;", "¢"},
+		{"&pound;", "£"},
+		{"&euro;", "€"},
+		{"&yen;", "¥"},
+		{"&copy;", "©"},
+		{"&reg;", "®"},
+		{"&rsquo;", "’"} // Haibane Renmei, AniList
+	};
+
+	std::string ret = string;
+	for (const auto& item : map)
+		ret = ReplaceAll(ret, item.first, item.second);
+	return ret;
 }
 
-/* these functions suck for i18n!...
-    but we only use them with JSON
-    stuff anyway */
+/* */
+std::string TextifySynopsis(const std::string& string) {
+	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
+}
+
+/* let Qt handle the heavy lifting of locale shit
+   I don't want to deal with */
 std::string ToUpper(const std::string& string) {
-	std::string result(string);
-	std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); });
-	return result;
+	/* todo: this "locale" will have to be moved to session.h */
+	QLocale locale;
+	return ToUtf8String(locale.toUpper(ToQString(string)));
 }
 
 std::string ToLower(const std::string& string) {
-	std::string result(string);
-	std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); });
-	return result;
+	QLocale locale;
+	return ToUtf8String(locale.toLower(ToQString(string)));
 }
 
 std::wstring ToWstring(const std::string& string) {
--- a/src/main.cc	Thu Nov 02 13:14:15 2023 -0400
+++ b/src/main.cc	Thu Nov 02 15:22:02 2023 -0400
@@ -2,11 +2,14 @@
 #include "gui/window.h"
 #include <QApplication>
 #include <QStyleFactory>
+#include <QLocale>
 
 Session session;
 
 int main(int argc, char** argv) {
 	QApplication app(argc, argv);
+	/* this is a reasonable default, I presume */
+	QLocale::setDefault(QLocale(QLocale::English, QLocale::UnitedStates));
 
 	session.config.Load();