minori: src/core/strings.cc comparison

comparison src/core/strings.cc @ 264:9a04802848c0

*: improve multiple things e.g. making some strings.cc functions modify strings in-place, improving m4_ax_have_qt.m4 code, making anime_db.cc rely on std::optional rather than std::shared_ptr (which was stupid anyway)

author	Paper <paper@paper.us.eu.org>
date	Thu, 11 Apr 2024 10:15:57 -0400
parents	dd211ff68b36
children	f31305b9f60a

comparison

equal deleted inserted replaced

-:96416310ea14
+:9a04802848c0
 #include <locale>
 #include <string>
 #include <unordered_map>
 #include <vector>
+#include "utf8proc.h"
 namespace Strings {
 /* ew */
 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
 	if (vector.size() < 1)
 		{"13", "XIII"}
 	};
 	for (const auto& item : vec)
 		ReplaceAll(string, item.second, item.first);
+}
+/* this also performs case folding, so our string is lowercase after this */
+void NormalizeUnicode(std::string& string) {
+	static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
+		UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
+		UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
+		UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
+	);
+	/* ack */
+	utf8proc_uint8_t* buf = nullptr;
+	const utf8proc_ssize_t size = utf8proc_map(
+		reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
+		string.size(),
+		&buf,
+		options
+	);
+	if (size)
+		string = std::string(reinterpret_cast<const char*>(buf), size);
+	if (buf)
+		free(buf);
+}
+void NormalizeAnimeTitle(std::string& string) {
+	ConvertRomanNumerals(string);
+	NormalizeUnicode(string);
+	RemoveLeadingChars(string, ' ');
+	RemoveTrailingChars(string, ' ');
 }
 /* removes dumb HTML tags because anilist is aids and
 * gives us HTML for synopses :/
 */
 	}
 	return ToInt(str, 0);
 }
-std::string RemoveLeadingChars(std::string s, const char c) {
+void RemoveLeadingChars(std::string& s, const char c) {
 	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
-	return s;
+}
-}
+void RemoveTrailingChars(std::string& s, const char c) {
-std::string RemoveTrailingChars(std::string s, const char c) {
 	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
-	return s;
 }
 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
 	for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
 		if (str[i] != sub[i])

Mercurial > minori

comparison src/core/strings.cc @ 264:9a04802848c0