Mercurial > minori
annotate src/core/strings.cc @ 98:582b2fca1561
strings: parse HTML entities when reading synopsis, make the
toupper and tolower functions more sane
| author | Paper <mrpapersonic@gmail.com> |
|---|---|
| date | Thu, 02 Nov 2023 15:22:02 -0400 |
| parents | 9b2b41f83a5e |
| children | 503bc1547d49 |
| rev | line source |
|---|---|
| 9 | 1 /** |
| 2 * strings.cpp: Useful functions for manipulating strings | |
| 3 **/ | |
| 4 #include "core/strings.h" | |
| 64 | 5 #include <QByteArray> |
| 6 #include <QString> | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
7 #include <QLocale> |
| 15 | 8 #include <algorithm> |
| 9 #include <cctype> | |
| 62 | 10 #include <codecvt> |
| 9 | 11 #include <locale> |
| 12 #include <string> | |
| 13 #include <vector> | |
| 14 | |
| 15 namespace Strings { | |
| 16 | |
| 17 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { | |
| 18 if (vector.size() < 1) | |
| 19 return "-"; | |
| 20 std::string out = ""; | |
| 21 for (unsigned long long i = 0; i < vector.size(); i++) { | |
| 22 out.append(vector.at(i)); | |
| 23 if (i < vector.size() - 1) | |
| 24 out.append(delimiter); | |
| 25 } | |
| 26 return out; | |
| 27 } | |
| 28 | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
29 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
30 size_t pos = 0; |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
31 while ((pos = string.find(find, pos)) != std::string::npos) { |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
32 string.replace(pos, find.length(), replace); |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
33 pos += replace.length(); |
| 9 | 34 } |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
35 return string; |
| 9 | 36 } |
| 37 | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
38 /* :) */ |
| 9 | 39 std::string SanitizeLineEndings(const std::string& string) { |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
40 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); |
| 9 | 41 } |
| 42 | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
43 /* removes dumb HTML tags because anilist is aids and |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
44 gives us HTML for synopses :/ */ |
| 9 | 45 std::string RemoveHtmlTags(const std::string& string) { |
| 46 std::string html(string); | |
| 47 while (html.find("<") != std::string::npos) { | |
| 48 auto startpos = html.find("<"); | |
| 49 auto endpos = html.find(">") + 1; | |
| 50 | |
| 51 if (endpos != std::string::npos) { | |
| 52 html.erase(startpos, endpos - startpos); | |
| 53 } | |
| 54 } | |
| 55 return html; | |
| 56 } | |
| 57 | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
58 /* e.g. "<" for "<" */ |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
59 std::string ParseHtmlEntities(const std::string& string) { |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
60 const std::unordered_map<std::string, std::string> map = { |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
61 {"<", "<"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
62 {"&rt;", ">"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
63 {" ", "\xA0"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
64 {"&", "&"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
65 {""", "\""}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
66 {"'", "'"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
67 {"¢", "¢"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
68 {"£", "£"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
69 {"€", "€"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
70 {"¥", "¥"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
71 {"©", "©"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
72 {"®", "®"}, |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
73 {"’", "’"} // Haibane Renmei, AniList |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
74 }; |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
75 |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
76 std::string ret = string; |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
77 for (const auto& item : map) |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
78 ret = ReplaceAll(ret, item.first, item.second); |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
79 return ret; |
| 9 | 80 } |
| 81 | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
82 /* */ |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
83 std::string TextifySynopsis(const std::string& string) { |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
84 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
85 } |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
86 |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
87 /* let Qt handle the heavy lifting of locale shit |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
88 I don't want to deal with */ |
| 15 | 89 std::string ToUpper(const std::string& string) { |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
90 /* todo: this "locale" will have to be moved to session.h */ |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
91 QLocale locale; |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
92 return ToUtf8String(locale.toUpper(ToQString(string))); |
| 15 | 93 } |
| 94 | |
| 95 std::string ToLower(const std::string& string) { | |
|
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
96 QLocale locale; |
|
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
97 return ToUtf8String(locale.toLower(ToQString(string))); |
| 15 | 98 } |
| 99 | |
| 62 | 100 std::wstring ToWstring(const std::string& string) { |
| 101 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
| 102 return converter.from_bytes(string); | |
| 103 } | |
| 104 | |
| 64 | 105 std::wstring ToWstring(const QString& string) { |
| 106 std::wstring arr(string.size(), L'\0'); | |
| 107 string.toWCharArray(&arr.front()); | |
| 108 return arr; | |
| 109 } | |
| 110 | |
| 62 | 111 std::string ToUtf8String(const std::wstring& wstring) { |
| 112 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
| 113 return converter.to_bytes(wstring); | |
| 114 } | |
| 115 | |
| 64 | 116 std::string ToUtf8String(const QString& string) { |
| 117 QByteArray ba = string.toUtf8(); | |
| 77 | 118 return std::string(ba.constData(), ba.size()); |
| 119 } | |
| 120 | |
| 121 std::string ToUtf8String(const QByteArray& ba) { | |
| 122 return std::string(ba.constData(), ba.size()); | |
| 64 | 123 } |
| 124 | |
| 125 QString ToQString(const std::string& string) { | |
| 126 return QString::fromUtf8(string.c_str(), string.length()); | |
| 127 } | |
| 128 | |
| 129 QString ToQString(const std::wstring& wstring) { | |
| 130 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | |
| 131 } | |
| 132 | |
| 9 | 133 } // namespace Strings |
