view src/core/strings.cc @ 258:862d0d8619f6

*: HUUUGE changes animia has been renamed to animone, so instead of thinking of a health condition, you think of a beautiful flower :) I've also edited some of the code for animone, but I have no idea if it even works or not because I don't have a mac or windows machine lying around. whoops! ... anyway, all of the changes divergent from Anisthesia are now licensed under BSD. it's possible that I could even rewrite most of the code to where I don't even have to keep the MIT license, but that's thinking too far into the future I've been slacking off on implementing the anime seasons page, mostly out of laziness. I think I'd have to create another db file specifically for the seasons anyway, this code is being pushed *primarily* because the hard drive it's on is failing! yay :)
author Paper <paper@paper.us.eu.org>
date Mon, 01 Apr 2024 02:43:44 -0400
parents c130f47f6f48
children dd211ff68b36
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include "core/session.h" // locale

#include <QByteArray>
#include <QDebug>
#include <QLocale>
#include <QString>

#include <algorithm>
#include <cctype>
#include <codecvt>
#include <iostream>
#include <locale>
#include <string>
#include <unordered_map>
#include <vector>

namespace Strings {

/* ew */
std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "";

	std::string out;

	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}

	return out;
}

std::string Implode(const std::set<std::string>& set, const std::string& delimiter) {
	if (set.size() < 1)
		return "";

	std::string out;

	for (auto it = set.cbegin(); it != set.cend(); it++) {
		out.append(*it);
		if (it != std::prev(set.cend(), 1))
			out.append(delimiter);
	}

	return out;
}

std::vector<std::string> Split(const std::string& text, const std::string& delimiter) {
	if (text.length() < 1)
		return {};

	std::vector<std::string> tokens;

	std::size_t start = 0, end = 0;
	while ((end = text.find(delimiter, start)) != std::string::npos) {
		tokens.push_back(text.substr(start, end - start));
		start = end + delimiter.length();
	}
	tokens.push_back(text.substr(start));

	return tokens;
}

/* This function is really only used for cleaning up the synopsis of
 * horrible HTML debris from AniList :)
 */
std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
	return string;
}

std::string SanitizeLineEndings(const std::string& string) {
	/* LOL */
	return ReplaceAll(ReplaceAll(ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "</p>", "\n"), "<br>", "\n"),
	                             "<br />", "\n"),
	                  "\n\n\n", "\n\n");
}

/* removes dumb HTML tags because anilist is aids and
 * gives us HTML for synopses :/
 */
std::string RemoveHtmlTags(std::string string) {
	while (string.find("<") != std::string::npos) {
		auto startpos = string.find("<");
		auto endpos = string.find(">") + 1;

		if (endpos != std::string::npos)
			string.erase(startpos, endpos - startpos);
	}
	return string;
}

/* e.g. "&lt;" for "<" */
std::string ParseHtmlEntities(std::string string) {
	const std::unordered_map<std::string, std::string> map = {
  /* The only one of these I can understand using are the first
  * three. why do the rest of these exist?
  *
  * probably mojibake.
  */
	    {"&lt;",    "<"   },
        {"&rt;",    ">"   },
        {"&nbsp;",  "\xA0"},
        {"&amp;",   "&"   },
        {"&quot;",  "\""  },
	    {"&apos;",  "'"   },
        {"&cent;",  "¢"  },
        {"&pound;", "£"  },
        {"&euro;",  "€" },
        {"&yen;",   "¥"  },
	    {"&copy;",  "©"  },
        {"&reg;",   "®"  },
        {"&rsquo;", "’" }  // Haibane Renmei, AniList
	};

	for (const auto& item : map)
		string = ReplaceAll(string, item.first, item.second);
	return string;
}

/* removes stupid HTML stuff */
std::string TextifySynopsis(const std::string& string) {
	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
}

/* let Qt handle the heavy lifting of locale shit
 * I don't want to deal with
 */
std::string ToUpper(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");

	std::wstring wstr;
	try {
		wstr = converter.from_bytes(string);
	} catch (std::range_error const& ex) {
		std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
	}
	return wstr;
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	const QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

std::string ToUtf8String(const bool b) {
	return b ? "true" : "false"; // lol
}

bool ToBool(const std::string& str, bool def) {
	std::istringstream s(Strings::ToLower(str));
	s >> std::boolalpha >> def;
	return def;
}

/* util funcs */
uint64_t HumanReadableSizeToBytes(const std::string& str) {
	static const std::unordered_map<std::string, uint64_t> bytes_map = {
	    {"KB", 1ull << 10},
	    {"MB", 1ull << 20},
	    {"GB", 1ull << 30},
	    {"TB", 1ull << 40},
	    {"PB", 1ull << 50}  /* surely we won't need more than this */
	};

	for (const auto& suffix : bytes_map) {
		if (str.find(suffix.first) != std::string::npos) {
			try {
				uint64_t size = std::stod(str) * suffix.second;
				return size;
			} catch (std::invalid_argument const& ex) {
				continue;
			}
		}
	}

	return ToInt(str, 0);
}

std::string RemoveLeadingChars(std::string s, const char c) {
	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
	return s;
}

std::string RemoveTrailingChars(std::string s, const char c) {
	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
	return s;
}

bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
		if (str[i] != sub[i])
			return false;

	return true;
}

} // namespace Strings