view src/core/strings.cc @ 137:69db40272acd

dep/animia: [WIP] huge refactor this WILL NOT compile, because lots of code has been changed and every API in the original codebase has been removed. note that this api setup is not exactly permanent...
author Paper <mrpapersonic@gmail.com>
date Fri, 10 Nov 2023 13:52:47 -0500
parents 275da698697d
children 9613d72b097e
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include <QByteArray>
#include <QDebug>
#include <QString>
#include <QLocale>
#include <algorithm>
#include <cctype>
#include <codecvt>
#include <locale>
#include <string>
#include <vector>
#include <unordered_map>

namespace Strings {

/* ew */
std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "-";
	std::string out = "";
	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}
	return out;
}

std::vector<std::string> Split(const std::string &text, const std::string& delimiter) {
	std::vector<std::string> tokens;

	std::size_t start = 0, end = 0;
	while ((end = text.find(delimiter, start)) != std::string::npos) {
		tokens.push_back(text.substr(start, end - start));
		start = end + delimiter.length();
	}
	tokens.push_back(text.substr(start));

	return tokens;
}

/* This function is really only used for cleaning up the synopsis of
   horrible HTML debris from AniList :) */
std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
	return string;
}

std::string SanitizeLineEndings(const std::string& string) {
	/* LOL */
	return
		ReplaceAll(
			ReplaceAll(
				ReplaceAll(
					ReplaceAll(
						ReplaceAll(string, "\r\n", "\n"),
					"</p>", "\n"),
				"<br>", "\n"),
			"<br />", "\n"),
		"\n\n\n", "\n\n");
}

/* removes dumb HTML tags because anilist is aids and
   gives us HTML for synopses :/ */
std::string RemoveHtmlTags(std::string string) {
	while (string.find("<") != std::string::npos) {
		auto startpos = string.find("<");
		auto endpos = string.find(">") + 1;

		if (endpos != std::string::npos)
			string.erase(startpos, endpos - startpos);
	}
	return string;
}

/* e.g. "&lt;" for "<" */
std::string ParseHtmlEntities(std::string string) {
	const std::unordered_map<std::string, std::string> map = {
		/* The only one of these I can understand using are the first
		   three. why do the rest of these exist? */
		{"&lt;", "<"},
		{"&rt;", ">"},
		{"&nbsp;", "\xA0"},
		{"&amp;", "&"},
		{"&quot;", "\""},
		{"&apos;", "'"},
		{"&cent;", "¢"},
		{"&pound;", "£"},
		{"&euro;", "€"},
		{"&yen;", "¥"},
		{"&copy;", "©"},
		{"&reg;", "®"},
		{"&rsquo;", "’"} // Haibane Renmei, AniList
	};

	for (const auto& item : map)
		string = ReplaceAll(string, item.first, item.second);
	return string;
}

/* removes stupid HTML stuff */
std::string TextifySynopsis(const std::string& string) {
	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
}

/* let Qt handle the heavy lifting of locale shit
   I don't want to deal with */
std::string ToUpper(const std::string& string) {
	/* todo: this "locale" will have to be moved to session.h
	   it also defaults to en-US, which sucks very much for
	   anyone who doesn't speak american english... */
	QLocale locale;
	return ToUtf8String(locale.toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	QLocale locale;
	return ToUtf8String(locale.toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.from_bytes(string);
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

/* not really an "int"... but who cares? */
int ToInt(const std::string& str, int def) {
	int tmp = 0;
	try {
		tmp = std::stoi(str);
	} catch (std::invalid_argument const& ex) {
		qDebug() << "Failed to parse int from std::string: no number found in " << ToQString(str) << " defaulting to " << def;
		tmp = def;
	}
	return tmp;
}

bool ToBool(const std::string& s, const bool def) {
	if (s.length() < 4)
		return def;
	std::string l = Strings::ToLower(s);
	if (Strings::BeginningMatchesSubstring(l, "true"))
		return true;
	else if (Strings::BeginningMatchesSubstring(l, "false"))
		return false;
	return def;
}

std::string ToUtf8String(const bool b) {
	return b ? "true" : "false";
}

uint64_t HumanReadableSizeToBytes(const std::string& str) {
	const std::unordered_map<std::string, uint64_t> bytes_map = {
		{"KB", 1ull << 10},
		{"MB", 1ull << 20},
		{"GB", 1ull << 30},
		{"TB", 1ull << 40},
		{"PB", 1ull << 50} /* surely we won't need more than this */
	};

	for (const auto& suffix : bytes_map) {
		if (str.find(suffix.first) != std::string::npos) {
			try {
				uint64_t size = std::stod(str) * suffix.second;
				return size;
			} catch (std::invalid_argument const& ex) {
				continue;
			}
		}
	}

	return ToInt(str, 0);
}

std::string RemoveLeadingChars(std::string s, const char c) {
	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
	return s;
}

std::string RemoveTrailingChars(std::string s, const char c) {
	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
	return s;
}

bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
		if (str[i] != sub[i])
			return false;
	return true;
}

} // namespace Strings