view src/core/strings.cc @ 187:9613d72b097e

*: multiple performance improvements like marking `static const` when it makes sense... date: change old stupid heap-based method to a structure which should make copying the thing actually make a copy. also many performance-based changes, like removing the std::tie dependency and forward-declaring nlohmann json *: replace every instance of QString::fromUtf8 to Strings::ToQString. the main difference is that our function will always convert exactly what is in the string, while some other times it would only convert up to the nearest NUL byte
author Paper <mrpapersonic@gmail.com>
date Wed, 06 Dec 2023 13:43:54 -0500
parents 275da698697d
children 7cf53145de11
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include "core/session.h" // locale

#include <QByteArray>
#include <QDebug>
#include <QString>
#include <QLocale>

#include <algorithm>
#include <cctype>
#include <codecvt>
#include <locale>
#include <string>
#include <vector>
#include <unordered_map>

namespace Strings {

/* ew */
std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "-";

	std::string out = "";

	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}

	return out;
}

std::vector<std::string> Split(const std::string &text, const std::string& delimiter) {
	std::vector<std::string> tokens;

	std::size_t start = 0, end = 0;
	while ((end = text.find(delimiter, start)) != std::string::npos) {
		tokens.push_back(text.substr(start, end - start));
		start = end + delimiter.length();
	}
	tokens.push_back(text.substr(start));

	return tokens;
}

/* This function is really only used for cleaning up the synopsis of
   horrible HTML debris from AniList :) */
std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
	return string;
}

std::string SanitizeLineEndings(const std::string& string) {
	/* LOL */
	return
		ReplaceAll(
			ReplaceAll(
				ReplaceAll(
					ReplaceAll(
						ReplaceAll(string, "\r\n", "\n"),
					"</p>", "\n"),
				"<br>", "\n"),
			"<br />", "\n"),
		"\n\n\n", "\n\n");
}

/* removes dumb HTML tags because anilist is aids and
   gives us HTML for synopses :/ */
std::string RemoveHtmlTags(std::string string) {
	while (string.find("<") != std::string::npos) {
		auto startpos = string.find("<");
		auto endpos = string.find(">") + 1;

		if (endpos != std::string::npos)
			string.erase(startpos, endpos - startpos);
	}
	return string;
}

/* e.g. "&lt;" for "<" */
std::string ParseHtmlEntities(std::string string) {
	const std::unordered_map<std::string, std::string> map = {
		/* The only one of these I can understand using are the first
		   three. why do the rest of these exist? */
		{"&lt;", "<"},
		{"&rt;", ">"},
		{"&nbsp;", "\xA0"},
		{"&amp;", "&"},
		{"&quot;", "\""},
		{"&apos;", "'"},
		{"&cent;", "¢"},
		{"&pound;", "£"},
		{"&euro;", "€"},
		{"&yen;", "¥"},
		{"&copy;", "©"},
		{"&reg;", "®"},
		{"&rsquo;", "’"} // Haibane Renmei, AniList
	};

	for (const auto& item : map)
		string = ReplaceAll(string, item.first, item.second);
	return string;
}

/* removes stupid HTML stuff */
std::string TextifySynopsis(const std::string& string) {
	return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
}

/* let Qt handle the heavy lifting of locale shit
   I don't want to deal with */
std::string ToUpper(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.from_bytes(string);
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	const QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

/* not really an "int"... but who cares? */
int ToInt(const std::string& str, int def) {
	int tmp = 0;
	try {
		tmp = std::stoi(str);
	} catch (std::invalid_argument const& ex) {
		qDebug() << "Failed to parse int from std::string: no number found in " << ToQString(str) << " defaulting to " << def;
		tmp = def;
	}
	return tmp;
}

bool ToBool(const std::string& s, const bool def) {
	if (s.length() < 4)
		return def;
	const std::string l = Strings::ToLower(s);
	if (Strings::BeginningMatchesSubstring(l, "true"))
		return true;
	else if (Strings::BeginningMatchesSubstring(l, "false"))
		return false;
	return def;
}

std::string ToUtf8String(const bool b) {
	return b ? "true" : "false";
}

uint64_t HumanReadableSizeToBytes(const std::string& str) {
	static const std::unordered_map<std::string, uint64_t> bytes_map = {
		{"KB", 1ull << 10},
		{"MB", 1ull << 20},
		{"GB", 1ull << 30},
		{"TB", 1ull << 40},
		{"PB", 1ull << 50} /* surely we won't need more than this */
	};

	for (const auto& suffix : bytes_map) {
		if (str.find(suffix.first) != std::string::npos) {
			try {
				uint64_t size = std::stod(str) * suffix.second;
				return size;
			} catch (std::invalid_argument const& ex) {
				continue;
			}
		}
	}

	return ToInt(str, 0);
}

std::string RemoveLeadingChars(std::string s, const char c) {
	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
	return s;
}

std::string RemoveTrailingChars(std::string s, const char c) {
	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
	return s;
}

bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
		if (str[i] != sub[i])
			return false;

	return true;
}

} // namespace Strings