changeset 264:9a04802848c0

*: improve multiple things e.g. making some strings.cc functions modify strings in-place, improving m4_ax_have_qt.m4 code, making anime_db.cc rely on std::optional rather than std::shared_ptr (which was stupid anyway)
author Paper <paper@paper.us.eu.org>
date Thu, 11 Apr 2024 10:15:57 -0400
parents 96416310ea14
children ff0b2052b234
files Makefile.am configure.ac dep/animone/src/strategist.cc include/core/anime.h include/core/strings.h m4/m4_ax_have_qt.m4 src/core/anime.cc src/core/anime_db.cc src/core/strings.cc src/gui/pages/torrents.cc
diffstat 10 files changed, 96 insertions(+), 58 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile.am	Wed Apr 03 20:46:40 2024 -0400
+++ b/Makefile.am	Thu Apr 11 10:15:57 2024 -0400
@@ -178,6 +178,9 @@
 	dep/semver/semver.hpp	\
 	$(minori_qtheaders)
 
+minori_utf8proc_sources = \
+	dep/utf8proc/utf8proc.c
+
 minori_SOURCES = \
 	src/core/anime_db.cc		\
 	src/core/anime.cc		\
@@ -231,6 +234,7 @@
 	$(minori_qtrc)	\
 	$(minori_locale_qm)	\
 	$(minori_moc_sources)	\
+	$(minori_utf8proc_sources)	\
 	rc/locale/translations.qrc	\
 	rc/final_qrc.cc
 
@@ -241,6 +245,7 @@
 	-I$(top_srcdir)/dep/animone/include \
 	-I$(top_srcdir)/dep/pugixml/src \
 	-I$(top_srcdir)/dep/anitomy \
+	-I$(top_srcdir)/dep/utf8proc \
 	-I$(top_srcdir)/dep
 
 minori_CPPFLAGS = $(QT_CPPFLAGS) $(LIBCURL_CPPFLAGS) $(minori_includes)
--- a/configure.ac	Wed Apr 03 20:46:40 2024 -0400
+++ b/configure.ac	Thu Apr 11 10:15:57 2024 -0400
@@ -9,6 +9,9 @@
 
 AM_INIT_AUTOMAKE([-Wall -Wportability foreign subdir-objects])
 
+dnl need C compiler for utf8proc
+AC_PROG_CC
+
 dnl Do we have a C++17 compiler
 : ${CXXFLAGS=""}
 AC_PROG_CXX
--- a/dep/animone/src/strategist.cc	Wed Apr 03 20:46:40 2024 -0400
+++ b/dep/animone/src/strategist.cc	Thu Apr 11 10:15:57 2024 -0400
@@ -74,8 +74,6 @@
 
 	/* map pids to our results, saves time with open_file_proc */
 	std::unordered_map<pid_t, Result*> pid_map;
-	pid_map.reserve(results.size());
-
 	std::set<pid_t> pids;
 
 	for (Result& result : results) {
--- a/include/core/anime.h	Wed Apr 03 20:46:40 2024 -0400
+++ b/include/core/anime.h	Thu Apr 11 10:15:57 2024 -0400
@@ -5,6 +5,7 @@
 #include <array>
 #include <map>
 #include <vector>
+#include <optional>
 
 namespace Anime {
 
@@ -186,7 +187,7 @@
 
 private:
 	SeriesInformation info_;
-	std::shared_ptr<struct ListInformation> list_info_;
+	std::optional<struct ListInformation> list_info_ = std::nullopt;
 };
 
 } // namespace Anime
--- a/include/core/strings.h	Wed Apr 03 20:46:40 2024 -0400
+++ b/include/core/strings.h	Thu Apr 11 10:15:57 2024 -0400
@@ -25,6 +25,8 @@
 void SanitizeLineEndings(std::string& string);
 void RemoveHtmlTags(std::string& string);
 void ParseHtmlEntities(std::string& string);
+void NormalizeUnicode(std::string& string);
+void NormalizeAnimeTitle(std::string& string);
 
 /* stupid HTML bullshit */
 void TextifySynopsis(std::string& string);
@@ -66,8 +68,8 @@
 
 uint64_t HumanReadableSizeToBytes(const std::string& str);
 
-std::string RemoveLeadingChars(std::string s, const char c);
-std::string RemoveTrailingChars(std::string s, const char c);
+void RemoveLeadingChars(std::string& s,  const char c);
+void RemoveTrailingChars(std::string& s, const char c);
 
 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub);
 
--- a/m4/m4_ax_have_qt.m4	Wed Apr 03 20:46:40 2024 -0400
+++ b/m4/m4_ax_have_qt.m4	Thu Apr 11 10:15:57 2024 -0400
@@ -81,6 +81,7 @@
     am_have_qt_pro="$am_have_qt_dir/test.pro"
     am_have_qt_stash="$am_have_qt_dir/.qmake.stash"
     am_have_qt_makefile="$am_have_qt_dir/Makefile"
+    am_have_qt_makefile_vars="$am_have_qt_dir/Makefile.vars"
     # http://qt-project.org/doc/qt-5/qmake-variable-reference.html#qt
     cat > $am_have_qt_pro << EOF
 win32 {
@@ -90,13 +91,20 @@
 qtHaveModule(core):    QT += core
 qtHaveModule(gui):     QT += gui
 qtHaveModule(widgets): QT += widgets
-percent.target = %
-percent.commands = @echo -n "\$(\$(@))\ "
-QMAKE_EXTRA_TARGETS += percent
+EOF
+    cat > "$am_have_qt_makefile_vars" << EOF
+include $am_have_qt_makefile
+
+# todo: use printf here
+CXXFLAGS:
+	@echo \$(CXXFLAGS) \$(INCPATH)
+
+LIBS:
+	@echo \$(LIBS)
 EOF
     $QMAKE $am_have_qt_pro -o $am_have_qt_makefile
-    QT_CXXFLAGS=`cd $am_have_qt_dir; make -s -f $am_have_qt_makefile CXXFLAGS INCPATH`
-    QT_LIBS=`cd $am_have_qt_dir; make -s -f $am_have_qt_makefile LIBS`
+    QT_CXXFLAGS=`cd $am_have_qt_dir; make -s -f $am_have_qt_makefile_vars CXXFLAGS`
+    QT_LIBS=`cd $am_have_qt_dir; make -s -f $am_have_qt_makefile_vars LIBS`
     rm $am_have_qt_pro $am_have_qt_stash $am_have_qt_makefile
     rmdir $am_have_qt_dir
 
--- a/src/core/anime.cc	Wed Apr 03 20:46:40 2024 -0400
+++ b/src/core/anime.cc	Thu Apr 11 10:15:57 2024 -0400
@@ -15,13 +15,14 @@
 
 /* User list data */
 bool Anime::IsInUserList() const {
-	if (list_info_.get())
+	if (list_info_.has_value())
 		return true;
 	return false;
 }
 
 void Anime::AddToUserList() {
-	list_info_.reset(new ListInformation);
+	ListInformation list = {0};
+	list_info_.emplace(list);
 }
 
 void Anime::RemoveFromUserList() {
@@ -29,22 +30,22 @@
 }
 
 ListStatus Anime::GetUserStatus() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->status;
 }
 
 int Anime::GetUserProgress() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->progress;
 }
 
 int Anime::GetUserScore() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->score;
 }
 
 std::string Anime::GetUserPresentableScore() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	const int score = list_info_->score;
 	if (score == 0)
 		return "";
@@ -77,87 +78,87 @@
 }
 
 Date Anime::GetUserDateStarted() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->started;
 }
 
 Date Anime::GetUserDateCompleted() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->completed;
 }
 
 bool Anime::GetUserIsPrivate() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->is_private;
 }
 
 unsigned int Anime::GetUserRewatchedTimes() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->rewatched_times;
 }
 
 bool Anime::GetUserIsRewatching() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->rewatching;
 }
 
 uint64_t Anime::GetUserTimeUpdated() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->updated;
 }
 
 std::string Anime::GetUserNotes() const {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	return list_info_->notes;
 }
 
 void Anime::SetUserStatus(ListStatus status) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->status = status;
 }
 
 void Anime::SetUserScore(int score) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->score = score;
 }
 
 void Anime::SetUserProgress(int progress) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->progress = progress;
 }
 
 void Anime::SetUserDateStarted(Date const& started) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->started = started;
 }
 
 void Anime::SetUserDateCompleted(Date const& completed) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->completed = completed;
 }
 
 void Anime::SetUserIsPrivate(bool is_private) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->is_private = is_private;
 }
 
 void Anime::SetUserRewatchedTimes(int rewatched) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->rewatched_times = rewatched;
 }
 
 void Anime::SetUserIsRewatching(bool rewatching) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->rewatching = rewatching;
 }
 
 void Anime::SetUserTimeUpdated(uint64_t updated) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->updated = updated;
 }
 
 void Anime::SetUserNotes(std::string const& notes) {
-	assert(list_info_.get());
+	assert(list_info_.has_value());
 	list_info_->notes = notes;
 }
 
--- a/src/core/anime_db.cc	Wed Apr 03 20:46:40 2024 -0400
+++ b/src/core/anime_db.cc	Thu Apr 11 10:15:57 2024 -0400
@@ -106,22 +106,6 @@
 	return (amt > 0) ? std::sqrt(squares_sum / amt) : 0;
 }
 
-template<typename T, typename U>
-static T get_lowest_in_map(const std::unordered_map<T, U>& map) {
-	if (map.size() <= 0)
-		return 0;
-
-	T id = 0;
-	U ret = std::numeric_limits<U>::max();
-	for (const auto& t : map) {
-		if (t.second < ret) {
-			ret = t.second;
-			id = t.first;
-		}
-	}
-	return id;
-}
-
 /*
  * TODO: separate this from the anime DB,
  * provide *some* sort of normalization
@@ -130,14 +114,17 @@
 	if (title.empty())
 		return 0;
 
+	std::string title_n(title);
+	Strings::NormalizeAnimeTitle(title_n);
+
 	for (const auto& [id, anime] : items) {
 		std::vector<std::string> synonyms(anime.GetTitleSynonyms());
 		synonyms.push_back(anime.GetUserPreferredTitle());
 
-		for (const auto& synonym : synonyms) {
-			if (synonym == title) {
+		for (auto& synonym : synonyms) {
+			Strings::NormalizeAnimeTitle(synonym);
+			if (synonym == title_n)
 				return id;
-			}
 		}
 	}
 
--- a/src/core/strings.cc	Wed Apr 03 20:46:40 2024 -0400
+++ b/src/core/strings.cc	Thu Apr 11 10:15:57 2024 -0400
@@ -18,6 +18,8 @@
 #include <unordered_map>
 #include <vector>
 
+#include "utf8proc.h"
+
 namespace Strings {
 
 /* ew */
@@ -98,6 +100,38 @@
 		ReplaceAll(string, item.second, item.first);
 }
 
+/* this also performs case folding, so our string is lowercase after this */
+void NormalizeUnicode(std::string& string) {
+	static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
+		UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
+		UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
+		UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
+	);
+
+	/* ack */
+	utf8proc_uint8_t* buf = nullptr;
+
+	const utf8proc_ssize_t size = utf8proc_map(
+		reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
+		string.size(),
+		&buf,
+		options
+	);
+
+	if (size)
+		string = std::string(reinterpret_cast<const char*>(buf), size);
+
+	if (buf)
+		free(buf);
+}
+
+void NormalizeAnimeTitle(std::string& string) {
+	ConvertRomanNumerals(string);
+	NormalizeUnicode(string);
+	RemoveLeadingChars(string, ' ');
+	RemoveTrailingChars(string, ' ');
+}
+
 /* removes dumb HTML tags because anilist is aids and
  * gives us HTML for synopses :/
  */
@@ -230,14 +264,12 @@
 	return ToInt(str, 0);
 }
 
-std::string RemoveLeadingChars(std::string s, const char c) {
+void RemoveLeadingChars(std::string& s, const char c) {
 	s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
-	return s;
 }
 
-std::string RemoveTrailingChars(std::string s, const char c) {
+void RemoveTrailingChars(std::string& s, const char c) {
 	s.erase(s.find_last_not_of(c) + 1, std::string::npos);
-	return s;
 }
 
 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
--- a/src/gui/pages/torrents.cc	Wed Apr 03 20:46:40 2024 -0400
+++ b/src/gui/pages/torrents.cc	Thu Apr 11 10:15:57 2024 -0400
@@ -151,8 +151,9 @@
 
 			/* todo: patch Anitomy so that it doesn't use wide strings */
 			torrent.SetTitle(Strings::ToUtf8String(elements.get(anitomy::kElementAnimeTitle)));
-			torrent.SetEpisode(
-			    Strings::RemoveLeadingChars(Strings::ToUtf8String(elements.get(anitomy::kElementEpisodeNumber)), '0'));
+			std::string episode = Strings::ToUtf8String(elements.get(anitomy::kElementEpisodeNumber));
+			Strings::RemoveLeadingChars(episode, '0');
+			torrent.SetEpisode(episode);
 			torrent.SetGroup(Strings::ToUtf8String(elements.get(anitomy::kElementReleaseGroup)));
 			torrent.SetResolution(Strings::ToUtf8String(elements.get(anitomy::kElementVideoResolution)));
 		}