Mercurial > minori
comparison src/core/strings.cc @ 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Thu, 02 Nov 2023 15:22:02 -0400 (14 months ago) |
parents | 9b2b41f83a5e |
children | 503bc1547d49 |
comparison
equal
deleted
inserted
replaced
97:18979b066284 | 98:582b2fca1561 |
---|---|
2 * strings.cpp: Useful functions for manipulating strings | 2 * strings.cpp: Useful functions for manipulating strings |
3 **/ | 3 **/ |
4 #include "core/strings.h" | 4 #include "core/strings.h" |
5 #include <QByteArray> | 5 #include <QByteArray> |
6 #include <QString> | 6 #include <QString> |
7 #include <QLocale> | |
7 #include <algorithm> | 8 #include <algorithm> |
8 #include <cctype> | 9 #include <cctype> |
9 #include <codecvt> | 10 #include <codecvt> |
10 #include <locale> | 11 #include <locale> |
11 #include <string> | 12 #include <string> |
23 out.append(delimiter); | 24 out.append(delimiter); |
24 } | 25 } |
25 return out; | 26 return out; |
26 } | 27 } |
27 | 28 |
28 std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) { | 29 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
29 std::string result; | 30 size_t pos = 0; |
30 size_t pos, find_len = find.size(), from = 0; | 31 while ((pos = string.find(find, pos)) != std::string::npos) { |
31 while ((pos = string.find(find, from)) != std::string::npos) { | 32 string.replace(pos, find.length(), replace); |
32 result.append(string, from, pos - from); | 33 pos += replace.length(); |
33 result.append(replace); | |
34 from = pos + find_len; | |
35 } | 34 } |
36 result.append(string, from, std::string::npos); | 35 return string; |
37 return result; | |
38 } | 36 } |
39 | 37 |
40 /* this function probably fucks your RAM but whatevs */ | 38 /* :) */ |
41 std::string SanitizeLineEndings(const std::string& string) { | 39 std::string SanitizeLineEndings(const std::string& string) { |
42 std::string result(string); | 40 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); |
43 result = ReplaceAll(result, "\r\n", "\n"); | |
44 result = ReplaceAll(result, "<br>", "\n"); | |
45 result = ReplaceAll(result, "\n\n\n", "\n\n"); | |
46 return result; | |
47 } | 41 } |
48 | 42 |
43 /* removes dumb HTML tags because anilist is aids and | |
44 gives us HTML for synopses :/ */ | |
49 std::string RemoveHtmlTags(const std::string& string) { | 45 std::string RemoveHtmlTags(const std::string& string) { |
50 std::string html(string); | 46 std::string html(string); |
51 while (html.find("<") != std::string::npos) { | 47 while (html.find("<") != std::string::npos) { |
52 auto startpos = html.find("<"); | 48 auto startpos = html.find("<"); |
53 auto endpos = html.find(">") + 1; | 49 auto endpos = html.find(">") + 1; |
57 } | 53 } |
58 } | 54 } |
59 return html; | 55 return html; |
60 } | 56 } |
61 | 57 |
62 std::string TextifySynopsis(const std::string& string) { | 58 /* e.g. "<" for "<" */ |
63 return RemoveHtmlTags(SanitizeLineEndings(string)); | 59 std::string ParseHtmlEntities(const std::string& string) { |
60 const std::unordered_map<std::string, std::string> map = { | |
61 {"<", "<"}, | |
62 {"&rt;", ">"}, | |
63 {" ", "\xA0"}, | |
64 {"&", "&"}, | |
65 {""", "\""}, | |
66 {"'", "'"}, | |
67 {"¢", "¢"}, | |
68 {"£", "£"}, | |
69 {"€", "€"}, | |
70 {"¥", "¥"}, | |
71 {"©", "©"}, | |
72 {"®", "®"}, | |
73 {"’", "’"} // Haibane Renmei, AniList | |
74 }; | |
75 | |
76 std::string ret = string; | |
77 for (const auto& item : map) | |
78 ret = ReplaceAll(ret, item.first, item.second); | |
79 return ret; | |
64 } | 80 } |
65 | 81 |
66 /* these functions suck for i18n!... | 82 /* */ |
67 but we only use them with JSON | 83 std::string TextifySynopsis(const std::string& string) { |
68 stuff anyway */ | 84 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
85 } | |
86 | |
87 /* let Qt handle the heavy lifting of locale shit | |
88 I don't want to deal with */ | |
69 std::string ToUpper(const std::string& string) { | 89 std::string ToUpper(const std::string& string) { |
70 std::string result(string); | 90 /* todo: this "locale" will have to be moved to session.h */ |
71 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); }); | 91 QLocale locale; |
72 return result; | 92 return ToUtf8String(locale.toUpper(ToQString(string))); |
73 } | 93 } |
74 | 94 |
75 std::string ToLower(const std::string& string) { | 95 std::string ToLower(const std::string& string) { |
76 std::string result(string); | 96 QLocale locale; |
77 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); | 97 return ToUtf8String(locale.toLower(ToQString(string))); |
78 return result; | |
79 } | 98 } |
80 | 99 |
81 std::wstring ToWstring(const std::string& string) { | 100 std::wstring ToWstring(const std::string& string) { |
82 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | 101 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; |
83 return converter.from_bytes(string); | 102 return converter.from_bytes(string); |