view src/core/strings.cc @ 98:582b2fca1561

strings: parse HTML entities when reading synopsis, make the toupper and tolower functions more sane
author Paper <mrpapersonic@gmail.com>
date Thu, 02 Nov 2023 15:22:02 -0400
parents 9b2b41f83a5e
children 503bc1547d49
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include <QByteArray>
#include <QString>
#include <QLocale>
#include <algorithm>
#include <cctype>
#include <codecvt>
#include <locale>
#include <string>
#include <vector>

namespace Strings {

std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "-";
	std::string out = "";
	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}
	return out;
}

std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
	return string;
}

/* :) */
std::string SanitizeLineEndings(const std::string& string) {
	return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n");
}

/* removes dumb HTML tags because anilist is aids and
   gives us HTML for synopses :/ */
std::string RemoveHtmlTags(const std::string& string) {
	std::string html(string);
	while (html.find("<") != std::string::npos) {
		auto startpos = html.find("<");
		auto endpos = html.find(">") + 1;

		if (endpos != std::string::npos) {
			html.erase(startpos, endpos - startpos);
		}
	}
	return html;
}

/* e.g. "&lt;" for "<" */
std::string ParseHtmlEntities(const std::string& string) {
	const std::unordered_map<std::string, std::string> map = {
		{"&lt;", "<"},
		{"&rt;", ">"},
		{"&nbsp;", "\xA0"},
		{"&amp;", "&"},
		{"&quot;", "\""},
		{"&apos;", "'"},
		{"&cent;", "¢"},
		{"&pound;", "£"},
		{"&euro;", "€"},
		{"&yen;", "¥"},
		{"&copy;", "©"},
		{"&reg;", "®"},
		{"&rsquo;", "’"} // Haibane Renmei, AniList
	};

	std::string ret = string;
	for (const auto& item : map)
		ret = ReplaceAll(ret, item.first, item.second);
	return ret;
}

/* */
std::string TextifySynopsis(const std::string& string) {
	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
}

/* let Qt handle the heavy lifting of locale shit
   I don't want to deal with */
std::string ToUpper(const std::string& string) {
	/* todo: this "locale" will have to be moved to session.h */
	QLocale locale;
	return ToUtf8String(locale.toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	QLocale locale;
	return ToUtf8String(locale.toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.from_bytes(string);
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

} // namespace Strings