Mercurial > minori
annotate src/core/strings.cc @ 99:503bc1547d49
strings: clarify on some functions and make some of them miniscule
amounts faster
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Fri, 03 Nov 2023 09:00:46 -0400 |
parents | 582b2fca1561 |
children | f5940a575d83 |
rev | line source |
---|---|
9 | 1 /** |
2 * strings.cpp: Useful functions for manipulating strings | |
3 **/ | |
4 #include "core/strings.h" | |
64 | 5 #include <QByteArray> |
6 #include <QString> | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
7 #include <QLocale> |
15 | 8 #include <algorithm> |
9 #include <cctype> | |
62 | 10 #include <codecvt> |
9 | 11 #include <locale> |
12 #include <string> | |
13 #include <vector> | |
14 | |
15 namespace Strings { | |
16 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
17 /* ew */ |
9 | 18 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) { |
19 if (vector.size() < 1) | |
20 return "-"; | |
21 std::string out = ""; | |
22 for (unsigned long long i = 0; i < vector.size(); i++) { | |
23 out.append(vector.at(i)); | |
24 if (i < vector.size() - 1) | |
25 out.append(delimiter); | |
26 } | |
27 return out; | |
28 } | |
29 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
30 /* This function is really only used for cleaning up the synopsis of |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
31 horrible HTML debris from AniList :) */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
32 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
33 size_t pos = 0; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
34 while ((pos = string.find(find, pos)) != std::string::npos) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
35 string.replace(pos, find.length(), replace); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
36 pos += replace.length(); |
9 | 37 } |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
38 return string; |
9 | 39 } |
40 | |
41 std::string SanitizeLineEndings(const std::string& string) { | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
42 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n"); |
9 | 43 } |
44 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
45 /* removes dumb HTML tags because anilist is aids and |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
46 gives us HTML for synopses :/ */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
47 std::string RemoveHtmlTags(std::string string) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
48 while (string.find("<") != std::string::npos) { |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
49 auto startpos = string.find("<"); |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
50 auto endpos = string.find(">") + 1; |
9 | 51 |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
52 if (endpos != std::string::npos) |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
53 string.erase(startpos, endpos - startpos); |
9 | 54 } |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
55 return string; |
9 | 56 } |
57 | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
58 /* e.g. "<" for "<" */ |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
59 std::string ParseHtmlEntities(std::string string) { |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
60 const std::unordered_map<std::string, std::string> map = { |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
61 /* The only one of these I can understand using are the first |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
62 three. why do the rest of these exist? */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
63 {"<", "<"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
64 {"&rt;", ">"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
65 {" ", "\xA0"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
66 {"&", "&"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
67 {""", "\""}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
68 {"'", "'"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
69 {"¢", "¢"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
70 {"£", "£"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
71 {"€", "€"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
72 {"¥", "¥"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
73 {"©", "©"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
74 {"®", "®"}, |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
75 {"’", "’"} // Haibane Renmei, AniList |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
76 }; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
77 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
78 for (const auto& item : map) |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
79 if (string.find(item.first) != std::string::npos) |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
80 string = ReplaceAll(string, item.first, item.second); |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
81 return string; |
9 | 82 } |
83 | |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
84 /* removes stupid HTML stuff */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
85 std::string TextifySynopsis(const std::string& string) { |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
86 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string))); |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
87 } |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
88 |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
89 /* let Qt handle the heavy lifting of locale shit |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
90 I don't want to deal with */ |
15 | 91 std::string ToUpper(const std::string& string) { |
99
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
92 /* todo: this "locale" will have to be moved to session.h |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
93 it also defaults to en-US, which sucks very much for |
503bc1547d49
strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents:
98
diff
changeset
|
94 anyone who doesn't speak american english... */ |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
95 QLocale locale; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
96 return ToUtf8String(locale.toUpper(ToQString(string))); |
15 | 97 } |
98 | |
99 std::string ToLower(const std::string& string) { | |
98
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
100 QLocale locale; |
582b2fca1561
strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents:
81
diff
changeset
|
101 return ToUtf8String(locale.toLower(ToQString(string))); |
15 | 102 } |
103 | |
62 | 104 std::wstring ToWstring(const std::string& string) { |
105 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
106 return converter.from_bytes(string); | |
107 } | |
108 | |
64 | 109 std::wstring ToWstring(const QString& string) { |
110 std::wstring arr(string.size(), L'\0'); | |
111 string.toWCharArray(&arr.front()); | |
112 return arr; | |
113 } | |
114 | |
62 | 115 std::string ToUtf8String(const std::wstring& wstring) { |
116 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; | |
117 return converter.to_bytes(wstring); | |
118 } | |
119 | |
64 | 120 std::string ToUtf8String(const QString& string) { |
121 QByteArray ba = string.toUtf8(); | |
77 | 122 return std::string(ba.constData(), ba.size()); |
123 } | |
124 | |
125 std::string ToUtf8String(const QByteArray& ba) { | |
126 return std::string(ba.constData(), ba.size()); | |
64 | 127 } |
128 | |
129 QString ToQString(const std::string& string) { | |
130 return QString::fromUtf8(string.c_str(), string.length()); | |
131 } | |
132 | |
133 QString ToQString(const std::wstring& wstring) { | |
134 return QString::fromWCharArray(wstring.c_str(), wstring.length()); | |
135 } | |
136 | |
9 | 137 } // namespace Strings |