Mercurial > minori
comparison src/core/strings.cc @ 264:9a04802848c0
*: improve multiple things
e.g. making some strings.cc functions modify strings in-place,
improving m4_ax_have_qt.m4 code, making anime_db.cc rely on
std::optional rather than std::shared_ptr (which was stupid
anyway)
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Thu, 11 Apr 2024 10:15:57 -0400 |
parents | dd211ff68b36 |
children | f31305b9f60a |
comparison
equal
deleted
inserted
replaced
263:96416310ea14 | 264:9a04802848c0 |
---|---|
16 #include <locale> | 16 #include <locale> |
17 #include <string> | 17 #include <string> |
18 #include <unordered_map> | 18 #include <unordered_map> |
19 #include <vector> | 19 #include <vector> |
20 | 20 |
21 #include "utf8proc.h" | |
22 | |
21 namespace Strings { | 23 namespace Strings { |
22 | 24 |
23 /* ew */ | 25 /* ew */ |
24 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { | 26 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { |
25 if (vector.size() < 1) | 27 if (vector.size() < 1) |
94 {"13", "XIII"} | 96 {"13", "XIII"} |
95 }; | 97 }; |
96 | 98 |
97 for (const auto& item : vec) | 99 for (const auto& item : vec) |
98 ReplaceAll(string, item.second, item.first); | 100 ReplaceAll(string, item.second, item.first); |
101 } | |
102 | |
103 /* this also performs case folding, so our string is lowercase after this */ | |
104 void NormalizeUnicode(std::string& string) { | |
105 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>( | |
106 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE | | |
107 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK | | |
108 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS | |
109 ); | |
110 | |
111 /* ack */ | |
112 utf8proc_uint8_t* buf = nullptr; | |
113 | |
114 const utf8proc_ssize_t size = utf8proc_map( | |
115 reinterpret_cast<const utf8proc_uint8_t*>(string.data()), | |
116 string.size(), | |
117 &buf, | |
118 options | |
119 ); | |
120 | |
121 if (size) | |
122 string = std::string(reinterpret_cast<const char*>(buf), size); | |
123 | |
124 if (buf) | |
125 free(buf); | |
126 } | |
127 | |
128 void NormalizeAnimeTitle(std::string& string) { | |
129 ConvertRomanNumerals(string); | |
130 NormalizeUnicode(string); | |
131 RemoveLeadingChars(string, ' '); | |
132 RemoveTrailingChars(string, ' '); | |
99 } | 133 } |
100 | 134 |
101 /* removes dumb HTML tags because anilist is aids and | 135 /* removes dumb HTML tags because anilist is aids and |
102 * gives us HTML for synopses :/ | 136 * gives us HTML for synopses :/ |
103 */ | 137 */ |
228 } | 262 } |
229 | 263 |
230 return ToInt(str, 0); | 264 return ToInt(str, 0); |
231 } | 265 } |
232 | 266 |
233 std::string RemoveLeadingChars(std::string s, const char c) { | 267 void RemoveLeadingChars(std::string& s, const char c) { |
234 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); | 268 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); |
235 return s; | 269 } |
236 } | 270 |
237 | 271 void RemoveTrailingChars(std::string& s, const char c) { |
238 std::string RemoveTrailingChars(std::string s, const char c) { | |
239 s.erase(s.find_last_not_of(c) + 1, std::string::npos); | 272 s.erase(s.find_last_not_of(c) + 1, std::string::npos); |
240 return s; | |
241 } | 273 } |
242 | 274 |
243 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { | 275 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { |
244 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) | 276 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) |
245 if (str[i] != sub[i]) | 277 if (str[i] != sub[i]) |