Mercurial > minori
diff src/core/strings.cc @ 264:9a04802848c0
*: improve multiple things
e.g. making some strings.cc functions modify strings in-place,
improving m4_ax_have_qt.m4 code, making anime_db.cc rely on
std::optional rather than std::shared_ptr (which was stupid
anyway)
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Thu, 11 Apr 2024 10:15:57 -0400 |
parents | dd211ff68b36 |
children | f31305b9f60a |
line wrap: on
line diff
--- a/src/core/strings.cc Wed Apr 03 20:46:40 2024 -0400 +++ b/src/core/strings.cc Thu Apr 11 10:15:57 2024 -0400 @@ -18,6 +18,8 @@ #include <unordered_map> #include <vector> +#include "utf8proc.h" + namespace Strings { /* ew */ @@ -98,6 +100,38 @@ ReplaceAll(string, item.second, item.first); } +/* this also performs case folding, so our string is lowercase after this */ +void NormalizeUnicode(std::string& string) { + static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( + UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | + UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | + UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS + ); + + /* ack */ + utf8proc_uint8_t* buf = nullptr; + + const utf8proc_ssize_t size = utf8proc_map( + reinterpret_cast<const utf8proc_uint8_t*>(string.data()), + string.size(), + &buf, + options + ); + + if (size) + string = std::string(reinterpret_cast<const char*>(buf), size); + + if (buf) + free(buf); +} + +void NormalizeAnimeTitle(std::string& string) { + ConvertRomanNumerals(string); + NormalizeUnicode(string); + RemoveLeadingChars(string, ' '); + RemoveTrailingChars(string, ' '); +} + /* removes dumb HTML tags because anilist is aids and * gives us HTML for synopses :/ */ @@ -230,14 +264,12 @@ return ToInt(str, 0); } -std::string RemoveLeadingChars(std::string s, const char c) { +void RemoveLeadingChars(std::string& s, const char c) { s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); - return s; } -std::string RemoveTrailingChars(std::string s, const char c) { +void RemoveTrailingChars(std::string& s, const char c) { s.erase(s.find_last_not_of(c) + 1, std::string::npos); - return s; } bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {