view src/core/strings.cc @ 337:a7d4e5107531

dep/animone: REFACTOR ALL THE THINGS 1: animone now has its own syntax divergent from anisthesia, making different platforms actually have their own sections 2: process names in animone are now called `comm' (this will probably break things). this is what its called in bsd/linux so I'm just going to use it everywhere 3: the X11 code now checks for the existence of a UTF-8 window title and passes it if available 4: ANYTHING THATS NOT LINUX IS 100% UNTESTED AND CAN AND WILL BREAK! I still actually need to test the bsd code. to be honest I'm probably going to move all of the bsds into separate files because they're all essentially different operating systems at this point
author Paper <paper@paper.us.eu.org>
date Wed, 19 Jun 2024 12:51:15 -0400
parents c32467cd06bb
children a0aa8c8c4307
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include "core/session.h" // locale

#include <QByteArray>
#include <QDebug>
#include <QLocale>
#include <QString>
#include <QTextDocument>
#include <QCoreApplication>

#include <algorithm>
#include <cctype>
#include <codecvt>
#include <iostream>
#include <iomanip>
#include <locale>
#include <string>
#include <unordered_map>
#include <vector>

#include "utf8proc.h"

namespace Strings {

/* ew */
std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "";

	std::string out;

	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}

	return out;
}

std::vector<std::string> Split(const std::string& text, const std::string& delimiter) {
	if (text.length() < 1)
		return {};

	std::vector<std::string> tokens;

	std::size_t start = 0, end = 0;
	while ((end = text.find(delimiter, start)) != std::string::npos) {
		tokens.push_back(text.substr(start, end - start));
		start = end + delimiter.length();
	}
	tokens.push_back(text.substr(start));

	return tokens;
}

/* This function is really only used for cleaning up the synopsis of
 * horrible HTML debris from AniList :)
 */
void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
}

void ConvertRomanNumerals(std::string& string) {
	static const std::vector<std::pair<std::string_view, std::string_view>> vec = {
		{"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"},
		{"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"},
		{"13", "XIII"}
	};

	for (const auto& item : vec)
		ReplaceAll(string, item.second, item.first);
}

/* this also performs case folding, so our string is lowercase after this */
void NormalizeUnicode(std::string& string) {
	static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
		UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
		UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
		UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
	);

	/* ack */
	utf8proc_uint8_t* buf = nullptr;

	const utf8proc_ssize_t size = utf8proc_map(
		reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
		string.size(),
		&buf,
		options
	);

	if (size)
		string = std::string(reinterpret_cast<const char*>(buf), size);

	if (buf)
		free(buf);
}

void NormalizeAnimeTitle(std::string& string) {
	ConvertRomanNumerals(string);
	NormalizeUnicode(string);
	RemoveLeadingChars(string, ' ');
	RemoveTrailingChars(string, ' ');
}

void TextifySynopsis(std::string& string) {
	/* Just let Qt deal with it. */
	QTextDocument text;
	text.setHtml(Strings::ToQString(string));
	string = Strings::ToUtf8String(text.toPlainText());
}

/* let Qt handle the heavy lifting of locale shit
 * I don't want to deal with
 */
std::string ToUpper(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");

	std::wstring wstr;
	try {
		wstr = converter.from_bytes(string);
	} catch (std::range_error const& ex) {
		std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
	}
	return wstr;
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	const QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

std::string ToUtf8String(const bool b) {
	return b ? "true" : "false"; // lol
}

bool ToBool(const std::string& str, bool def) {
	std::istringstream s(Strings::ToLower(str));
	s >> std::boolalpha >> def;
	return def;
}

/* util funcs */
uint64_t HumanReadableSizeToBytes(const std::string& str) {
	static const std::unordered_map<std::string, uint64_t> bytes_map = {
		{"KB", 1000ull},
		{"MB", 1000000ull},
		{"GB", 1000000000ull},
		{"TB", 1000000000000ull},
		{"PB", 1000000000000000ull},
	    {"KiB", 1ull << 10},
	    {"MiB", 1ull << 20},
	    {"GiB", 1ull << 30},
	    {"TiB", 1ull << 40},
	    {"PiB", 1ull << 50}  /* surely we won't need more than this */
	};

	for (const auto& suffix : bytes_map) {
		if (str.find(suffix.first) != std::string::npos) {
			try {
				uint64_t size = std::stod(str) * suffix.second;
				return size;
			} catch (std::invalid_argument const& ex) {
				continue;
			}
		}
	}

	return ToInt(str, 0);
}

std::string BytesToHumanReadableSize(uint64_t bytes, int precision) {
#if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0)
	/* QLocale in Qt >= 5.10.0 has a function for this */
	return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision));
#else
	static const std::unordered_map<uint64_t, std::string> map = {
		{1ull << 10, "KiB"},
		{1ull << 20, "MiB"},
		{1ull << 30, "GiB"},
		{1ull << 40, "TiB"},
		{1ull << 50, "PiB"}
	};

	for (const auto& suffix : map) {
		if (bytes / suffix.first < 1)
			continue;

		std::stringstream ss;
		ss << std::setprecision(precision)
		   << (static_cast<double>(bytes) / suffix.first) << " "
		   << suffix.second;
		return ss.str();
	}

	/* better luck next time */
	return "0 bytes";
#endif
}

void RemoveLeadingChars(std::string& s, const char c) {
	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
}

void RemoveTrailingChars(std::string& s, const char c) {
	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
}

bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
		if (str[i] != sub[i])
			return false;

	return true;
}

std::string Translate(const char* str) {
	return Strings::ToUtf8String(QCoreApplication::tr(str));
}

} // namespace Strings