view src/core/strings.cc @ 327:b5d6c27c308f

anime: refactor Anime::SeriesSeason to Season class ToLocalString has also been altered to take in both season and year because lots of locales actually treat formatting seasons differently! most notably is Russian which adds a suffix at the end to notate seasons(??)
author Paper <paper@paper.us.eu.org>
date Thu, 13 Jun 2024 01:49:18 -0400
parents c32467cd06bb
children a0aa8c8c4307
line wrap: on
line source

/**
 * strings.cpp: Useful functions for manipulating strings
 **/
#include "core/strings.h"
#include "core/session.h" // locale

#include <QByteArray>
#include <QDebug>
#include <QLocale>
#include <QString>
#include <QTextDocument>
#include <QCoreApplication>

#include <algorithm>
#include <cctype>
#include <codecvt>
#include <iostream>
#include <iomanip>
#include <locale>
#include <string>
#include <unordered_map>
#include <vector>

#include "utf8proc.h"

namespace Strings {

/* ew */
std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
	if (vector.size() < 1)
		return "";

	std::string out;

	for (unsigned long long i = 0; i < vector.size(); i++) {
		out.append(vector.at(i));
		if (i < vector.size() - 1)
			out.append(delimiter);
	}

	return out;
}

std::vector<std::string> Split(const std::string& text, const std::string& delimiter) {
	if (text.length() < 1)
		return {};

	std::vector<std::string> tokens;

	std::size_t start = 0, end = 0;
	while ((end = text.find(delimiter, start)) != std::string::npos) {
		tokens.push_back(text.substr(start, end - start));
		start = end + delimiter.length();
	}
	tokens.push_back(text.substr(start));

	return tokens;
}

/* This function is really only used for cleaning up the synopsis of
 * horrible HTML debris from AniList :)
 */
void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) {
	size_t pos = 0;
	while ((pos = string.find(find, pos)) != std::string::npos) {
		string.replace(pos, find.length(), replace);
		pos += replace.length();
	}
}

void ConvertRomanNumerals(std::string& string) {
	static const std::vector<std::pair<std::string_view, std::string_view>> vec = {
		{"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"},
		{"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"},
		{"13", "XIII"}
	};

	for (const auto& item : vec)
		ReplaceAll(string, item.second, item.first);
}

/* this also performs case folding, so our string is lowercase after this */
void NormalizeUnicode(std::string& string) {
	static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
		UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
		UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
		UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
	);

	/* ack */
	utf8proc_uint8_t* buf = nullptr;

	const utf8proc_ssize_t size = utf8proc_map(
		reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
		string.size(),
		&buf,
		options
	);

	if (size)
		string = std::string(reinterpret_cast<const char*>(buf), size);

	if (buf)
		free(buf);
}

void NormalizeAnimeTitle(std::string& string) {
	ConvertRomanNumerals(string);
	NormalizeUnicode(string);
	RemoveLeadingChars(string, ' ');
	RemoveTrailingChars(string, ' ');
}

void TextifySynopsis(std::string& string) {
	/* Just let Qt deal with it. */
	QTextDocument text;
	text.setHtml(Strings::ToQString(string));
	string = Strings::ToUtf8String(text.toPlainText());
}

/* let Qt handle the heavy lifting of locale shit
 * I don't want to deal with
 */
std::string ToUpper(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
}

std::string ToLower(const std::string& string) {
	return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
}

std::wstring ToWstring(const std::string& string) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");

	std::wstring wstr;
	try {
		wstr = converter.from_bytes(string);
	} catch (std::range_error const& ex) {
		std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
	}
	return wstr;
}

std::wstring ToWstring(const QString& string) {
	std::wstring arr(string.size(), L'\0');
	string.toWCharArray(&arr.front());
	return arr;
}

std::string ToUtf8String(const std::wstring& wstring) {
	static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
	return converter.to_bytes(wstring);
}

std::string ToUtf8String(const QString& string) {
	const QByteArray ba = string.toUtf8();
	return std::string(ba.constData(), ba.size());
}

std::string ToUtf8String(const QByteArray& ba) {
	return std::string(ba.constData(), ba.size());
}

QString ToQString(const std::string& string) {
	return QString::fromUtf8(string.c_str(), string.length());
}

QString ToQString(const std::wstring& wstring) {
	return QString::fromWCharArray(wstring.c_str(), wstring.length());
}

std::string ToUtf8String(const bool b) {
	return b ? "true" : "false"; // lol
}

bool ToBool(const std::string& str, bool def) {
	std::istringstream s(Strings::ToLower(str));
	s >> std::boolalpha >> def;
	return def;
}

/* util funcs */
uint64_t HumanReadableSizeToBytes(const std::string& str) {
	static const std::unordered_map<std::string, uint64_t> bytes_map = {
		{"KB", 1000ull},
		{"MB", 1000000ull},
		{"GB", 1000000000ull},
		{"TB", 1000000000000ull},
		{"PB", 1000000000000000ull},
	    {"KiB", 1ull << 10},
	    {"MiB", 1ull << 20},
	    {"GiB", 1ull << 30},
	    {"TiB", 1ull << 40},
	    {"PiB", 1ull << 50}  /* surely we won't need more than this */
	};

	for (const auto& suffix : bytes_map) {
		if (str.find(suffix.first) != std::string::npos) {
			try {
				uint64_t size = std::stod(str) * suffix.second;
				return size;
			} catch (std::invalid_argument const& ex) {
				continue;
			}
		}
	}

	return ToInt(str, 0);
}

std::string BytesToHumanReadableSize(uint64_t bytes, int precision) {
#if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0)
	/* QLocale in Qt >= 5.10.0 has a function for this */
	return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision));
#else
	static const std::unordered_map<uint64_t, std::string> map = {
		{1ull << 10, "KiB"},
		{1ull << 20, "MiB"},
		{1ull << 30, "GiB"},
		{1ull << 40, "TiB"},
		{1ull << 50, "PiB"}
	};

	for (const auto& suffix : map) {
		if (bytes / suffix.first < 1)
			continue;

		std::stringstream ss;
		ss << std::setprecision(precision)
		   << (static_cast<double>(bytes) / suffix.first) << " "
		   << suffix.second;
		return ss.str();
	}

	/* better luck next time */
	return "0 bytes";
#endif
}

void RemoveLeadingChars(std::string& s, const char c) {
	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
}

void RemoveTrailingChars(std::string& s, const char c) {
	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
}

bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
		if (str[i] != sub[i])
			return false;

	return true;
}

std::string Translate(const char* str) {
	return Strings::ToUtf8String(QCoreApplication::tr(str));
}

} // namespace Strings