comparison src/core/strings.cc @ 264:9a04802848c0

*: improve multiple things e.g. making some strings.cc functions modify strings in-place, improving m4_ax_have_qt.m4 code, making anime_db.cc rely on std::optional rather than std::shared_ptr (which was stupid anyway)
author Paper <paper@paper.us.eu.org>
date Thu, 11 Apr 2024 10:15:57 -0400
parents dd211ff68b36
children f31305b9f60a
comparison
equal deleted inserted replaced
263:96416310ea14 264:9a04802848c0
16 #include <locale> 16 #include <locale>
17 #include <string> 17 #include <string>
18 #include <unordered_map> 18 #include <unordered_map>
19 #include <vector> 19 #include <vector>
20 20
21 #include "utf8proc.h"
22
21 namespace Strings { 23 namespace Strings {
22 24
23 /* ew */ 25 /* ew */
24 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { 26 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
25 if (vector.size() < 1) 27 if (vector.size() < 1)
94 {"13", "XIII"} 96 {"13", "XIII"}
95 }; 97 };
96 98
97 for (const auto& item : vec) 99 for (const auto& item : vec)
98 ReplaceAll(string, item.second, item.first); 100 ReplaceAll(string, item.second, item.first);
101 }
102
103 /* this also performs case folding, so our string is lowercase after this */
104 void NormalizeUnicode(std::string& string) {
105 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
106 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
107 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
108 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
109 );
110
111 /* ack */
112 utf8proc_uint8_t* buf = nullptr;
113
114 const utf8proc_ssize_t size = utf8proc_map(
115 reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
116 string.size(),
117 &buf,
118 options
119 );
120
121 if (size)
122 string = std::string(reinterpret_cast<const char*>(buf), size);
123
124 if (buf)
125 free(buf);
126 }
127
128 void NormalizeAnimeTitle(std::string& string) {
129 ConvertRomanNumerals(string);
130 NormalizeUnicode(string);
131 RemoveLeadingChars(string, ' ');
132 RemoveTrailingChars(string, ' ');
99 } 133 }
100 134
101 /* removes dumb HTML tags because anilist is aids and 135 /* removes dumb HTML tags because anilist is aids and
102 * gives us HTML for synopses :/ 136 * gives us HTML for synopses :/
103 */ 137 */
228 } 262 }
229 263
230 return ToInt(str, 0); 264 return ToInt(str, 0);
231 } 265 }
232 266
233 std::string RemoveLeadingChars(std::string s, const char c) { 267 void RemoveLeadingChars(std::string& s, const char c) {
234 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1)); 268 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
235 return s; 269 }
236 } 270
237 271 void RemoveTrailingChars(std::string& s, const char c) {
238 std::string RemoveTrailingChars(std::string s, const char c) {
239 s.erase(s.find_last_not_of(c) + 1, std::string::npos); 272 s.erase(s.find_last_not_of(c) + 1, std::string::npos);
240 return s;
241 } 273 }
242 274
243 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) { 275 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
244 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++) 276 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
245 if (str[i] != sub[i]) 277 if (str[i] != sub[i])