Mercurial > minori
comparison src/core/strings.cc @ 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
| author | Paper <mrpapersonic@gmail.com> |
|---|---|
| date | Thu, 02 Nov 2023 15:22:02 -0400 |
| parents | 9b2b41f83a5e |
| children | 503bc1547d49 |
comparison
equal
deleted
inserted
replaced
| 97:18979b066284 | 98:582b2fca1561 |
|---|---|
| 2 * strings.cpp: Useful functions for manipulating strings | 2 * strings.cpp: Useful functions for manipulating strings |
| 3 **/ | 3 **/ |
| 4 #include "core/strings.h" | 4 #include "core/strings.h" |
| 5 #include <QByteArray> | 5 #include <QByteArray> |
| 6 #include <QString> | 6 #include <QString> |
| 7 #include <QLocale> | |
| 7 #include <algorithm> | 8 #include <algorithm> |
| 8 #include <cctype> | 9 #include <cctype> |
| 9 #include <codecvt> | 10 #include <codecvt> |
| 10 #include <locale> | 11 #include <locale> |
| 11 #include <string> | 12 #include <string> |
| 23 out.append(delimiter); | 24 out.append(delimiter); |
| 24 } | 25 } |
| 25 return out; | 26 return out; |
| 26 } | 27 } |
| 27 | 28 |
| 28 std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) { | 29 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
| 29 std::string result; | 30 size_t pos = 0; |
| 30 size_t pos, find_len = find.size(), from = 0; | 31 while ((pos = string.find(find, pos)) != std::string::npos) { |
| 31 while ((pos = string.find(find, from)) != std::string::npos) { | 32 string.replace(pos, find.length(), replace); |
| 32 result.append(string, from, pos - from); | 33 pos += replace.length(); |
| 33 result.append(replace); | |
| 34 from = pos + find_len; | |
| 35 } | 34 } |
| 36 result.append(string, from, std::string::npos); | 35 return string; |
| 37 return result; | |
| 38 } | 36 } |
| 39 | 37 |
| 40 /* this function probably fucks your RAM but whatevs */ | 38 /* :) */ |
| 41 std::string SanitizeLineEndings(const std::string& string) { | 39 std::string SanitizeLineEndings(const std::string& string) { |
| 42 std::string result(string); | 40 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); |
| 43 result = ReplaceAll(result, "\r\n", "\n"); | |
| 44 result = ReplaceAll(result, "<br>", "\n"); | |
| 45 result = ReplaceAll(result, "\n\n\n", "\n\n"); | |
| 46 return result; | |
| 47 } | 41 } |
| 48 | 42 |
| 43 /* removes dumb HTML tags because anilist is aids and | |
| 44 gives us HTML for synopses :/ */ | |
| 49 std::string RemoveHtmlTags(const std::string& string) { | 45 std::string RemoveHtmlTags(const std::string& string) { |
| 50 std::string html(string); | 46 std::string html(string); |
| 51 while (html.find("<") != std::string::npos) { | 47 while (html.find("<") != std::string::npos) { |
| 52 auto startpos = html.find("<"); | 48 auto startpos = html.find("<"); |
| 53 auto endpos = html.find(">") + 1; | 49 auto endpos = html.find(">") + 1; |
| 57 } | 53 } |
| 58 } | 54 } |
| 59 return html; | 55 return html; |
| 60 } | 56 } |
| 61 | 57 |
| 62 std::string TextifySynopsis(const std::string& string) { | 58 /* e.g. "<" for "<" */ |
| 63 return RemoveHtmlTags(SanitizeLineEndings(string)); | 59 std::string ParseHtmlEntities(const std::string& string) { |
| 60 const std::unordered_map<std::string, std::string> map = { | |
| 61 {"<", "<"}, | |
| 62 {"&rt;", ">"}, | |
| 63 {" ", "\xA0"}, | |
| 64 {"&", "&"}, | |
| 65 {""", "\""}, | |
| 66 {"'", "'"}, | |
| 67 {"¢", "¢"}, | |
| 68 {"£", "£"}, | |
| 69 {"€", "€"}, | |
| 70 {"¥", "¥"}, | |
| 71 {"©", "©"}, | |
| 72 {"®", "®"}, | |
| 73 {"’", "’"} // Haibane Renmei, AniList | |
| 74 }; | |
| 75 | |
| 76 std::string ret = string; | |
| 77 for (const auto& item : map) | |
| 78 ret = ReplaceAll(ret, item.first, item.second); | |
| 79 return ret; | |
| 64 } | 80 } |
| 65 | 81 |
| 66 /* these functions suck for i18n!... | 82 /* */ |
| 67 but we only use them with JSON | 83 std::string TextifySynopsis(const std::string& string) { |
| 68 stuff anyway */ | 84 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
| 85 } | |
| 86 | |
| 87 /* let Qt handle the heavy lifting of locale shit | |
| 88 I don't want to deal with */ | |
| 69 std::string ToUpper(const std::string& string) { | 89 std::string ToUpper(const std::string& string) { |
| 70 std::string result(string); | 90 /* todo: this "locale" will have to be moved to session.h */ |
| 71 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); }); | 91 QLocale locale; |
| 72 return result; | 92 return ToUtf8String(locale.toUpper(ToQString(string))); |
| 73 } | 93 } |
| 74 | 94 |
| 75 std::string ToLower(const std::string& string) { | 95 std::string ToLower(const std::string& string) { |
| 76 std::string result(string); | 96 QLocale locale; |
| 77 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); | 97 return ToUtf8String(locale.toLower(ToQString(string))); |
| 78 return result; | |
| 79 } | 98 } |
| 80 | 99 |
| 81 std::wstring ToWstring(const std::string& string) { | 100 std::wstring ToWstring(const std::string& string) { |
| 82 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | 101 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; |
| 83 return converter.from_bytes(string); | 102 return converter.from_bytes(string); |
