diff src/core/strings.cc @ 98:582b2fca1561

strings: parse HTML entities when reading synopsis, make the toupper and tolower functions more sane
author Paper <mrpapersonic@gmail.com>
date Thu, 02 Nov 2023 15:22:02 -0400 (14 months ago)
parents 9b2b41f83a5e
children 503bc1547d49
line wrap: on
line diff
--- a/src/core/strings.cc	Thu Nov 02 13:14:15 2023 -0400
+++ b/src/core/strings.cc	Thu Nov 02 15:22:02 2023 -0400
@@ -4,6 +4,7 @@
 #include "core/strings.h"
 #include <QByteArray>
 #include <QString>
+#include <QLocale>
 #include <algorithm>
 #include <cctype>
 #include <codecvt>
@@ -25,27 +26,22 @@
 	return out;
 }
 
-std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) {
-	std::string result;
-	size_t pos, find_len = find.size(), from = 0;
-	while ((pos = string.find(find, from)) != std::string::npos) {
-		result.append(string, from, pos - from);
-		result.append(replace);
-		from = pos + find_len;
+std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
+	size_t pos = 0;
+	while ((pos = string.find(find, pos)) != std::string::npos) {
+		string.replace(pos, find.length(), replace);
+		pos += replace.length();
 	}
-	result.append(string, from, std::string::npos);
-	return result;
+	return string;
 }
 
-/* this function probably fucks your RAM but whatevs */
+/* :) */
 std::string SanitizeLineEndings(const std::string& string) {
-	std::string result(string);
-	result = ReplaceAll(result, "\r\n", "\n");
-	result = ReplaceAll(result, "<br>", "\n");
-	result = ReplaceAll(result, "\n\n\n", "\n\n");
-	return result;
+	return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n");
 }
 
+/* removes dumb HTML tags because anilist is aids and
+   gives us HTML for synopses :/ */
 std::string RemoveHtmlTags(const std::string& string) {
 	std::string html(string);
 	while (html.find("<") != std::string::npos) {
@@ -59,23 +55,46 @@
 	return html;
 }
 
-std::string TextifySynopsis(const std::string& string) {
-	return RemoveHtmlTags(SanitizeLineEndings(string));
+/* e.g. "&lt;" for "<" */
+std::string ParseHtmlEntities(const std::string& string) {
+	const std::unordered_map<std::string, std::string> map = {
+		{"&lt;", "<"},
+		{"&rt;", ">"},
+		{"&nbsp;", "\xA0"},
+		{"&amp;", "&"},
+		{"&quot;", "\""},
+		{"&apos;", "'"},
+		{"&cent;", "¢"},
+		{"&pound;", "£"},
+		{"&euro;", "€"},
+		{"&yen;", "¥"},
+		{"&copy;", "©"},
+		{"&reg;", "®"},
+		{"&rsquo;", "’"} // Haibane Renmei, AniList
+	};
+
+	std::string ret = string;
+	for (const auto& item : map)
+		ret = ReplaceAll(ret, item.first, item.second);
+	return ret;
 }
 
-/* these functions suck for i18n!...
-    but we only use them with JSON
-    stuff anyway */
+/* */
+std::string TextifySynopsis(const std::string& string) {
+	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
+}
+
+/* let Qt handle the heavy lifting of locale shit
+   I don't want to deal with */
 std::string ToUpper(const std::string& string) {
-	std::string result(string);
-	std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); });
-	return result;
+	/* todo: this "locale" will have to be moved to session.h */
+	QLocale locale;
+	return ToUtf8String(locale.toUpper(ToQString(string)));
 }
 
 std::string ToLower(const std::string& string) {
-	std::string result(string);
-	std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); });
-	return result;
+	QLocale locale;
+	return ToUtf8String(locale.toLower(ToQString(string)));
 }
 
 std::wstring ToWstring(const std::string& string) {