annotate src/core/strings.cc @ 99:503bc1547d49

strings: clarify on some functions and make some of them miniscule amounts faster
author Paper <mrpapersonic@gmail.com>
date Fri, 03 Nov 2023 09:00:46 -0400
parents 582b2fca1561
children f5940a575d83
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
1 /**
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
2 * strings.cpp: Useful functions for manipulating strings
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
3 **/
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
4 #include "core/strings.h"
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
5 #include <QByteArray>
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
6 #include <QString>
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
7 #include <QLocale>
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
8 #include <algorithm>
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
9 #include <cctype>
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
10 #include <codecvt>
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
11 #include <locale>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
12 #include <string>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
13 #include <vector>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
14
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
15 namespace Strings {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
16
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
17 /* ew */
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
18 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
19 if (vector.size() < 1)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
20 return "-";
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
21 std::string out = "";
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
22 for (unsigned long long i = 0; i < vector.size(); i++) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
23 out.append(vector.at(i));
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
24 if (i < vector.size() - 1)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
25 out.append(delimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
26 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
27 return out;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
28 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
29
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
30 /* This function is really only used for cleaning up the synopsis of
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
31 horrible HTML debris from AniList :) */
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
32 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
33 size_t pos = 0;
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
34 while ((pos = string.find(find, pos)) != std::string::npos) {
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
35 string.replace(pos, find.length(), replace);
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
36 pos += replace.length();
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
37 }
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
38 return string;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
39 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
40
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
41 std::string SanitizeLineEndings(const std::string& string) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
42 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n");
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
43 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
44
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
45 /* removes dumb HTML tags because anilist is aids and
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
46 gives us HTML for synopses :/ */
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
47 std::string RemoveHtmlTags(std::string string) {
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
48 while (string.find("<") != std::string::npos) {
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
49 auto startpos = string.find("<");
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
50 auto endpos = string.find(">") + 1;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
51
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
52 if (endpos != std::string::npos)
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
53 string.erase(startpos, endpos - startpos);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
54 }
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
55 return string;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
56 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
57
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
58 /* e.g. "&lt;" for "<" */
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
59 std::string ParseHtmlEntities(std::string string) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
60 const std::unordered_map<std::string, std::string> map = {
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
61 /* The only one of these I can understand using are the first
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
62 three. why do the rest of these exist? */
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
63 {"&lt;", "<"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
64 {"&rt;", ">"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
65 {"&nbsp;", "\xA0"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
66 {"&amp;", "&"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
67 {"&quot;", "\""},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
68 {"&apos;", "'"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
69 {"&cent;", "¢"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
70 {"&pound;", "£"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
71 {"&euro;", "€"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
72 {"&yen;", "¥"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
73 {"&copy;", "©"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
74 {"&reg;", "®"},
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
75 {"&rsquo;", "’"} // Haibane Renmei, AniList
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
76 };
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
77
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
78 for (const auto& item : map)
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
79 if (string.find(item.first) != std::string::npos)
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
80 string = ReplaceAll(string, item.first, item.second);
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
81 return string;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
82 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
83
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
84 /* removes stupid HTML stuff */
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
85 std::string TextifySynopsis(const std::string& string) {
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
86 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
87 }
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
88
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
89 /* let Qt handle the heavy lifting of locale shit
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
90 I don't want to deal with */
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
91 std::string ToUpper(const std::string& string) {
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
92 /* todo: this "locale" will have to be moved to session.h
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
93 it also defaults to en-US, which sucks very much for
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
94 anyone who doesn't speak american english... */
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
95 QLocale locale;
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
96 return ToUtf8String(locale.toUpper(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
97 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
98
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
99 std::string ToLower(const std::string& string) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
100 QLocale locale;
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
101 return ToUtf8String(locale.toLower(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
102 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
103
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
104 std::wstring ToWstring(const std::string& string) {
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
105 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
106 return converter.from_bytes(string);
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
107 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
108
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
109 std::wstring ToWstring(const QString& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
110 std::wstring arr(string.size(), L'\0');
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
111 string.toWCharArray(&arr.front());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
112 return arr;
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
113 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
114
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
115 std::string ToUtf8String(const std::wstring& wstring) {
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
116 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
117 return converter.to_bytes(wstring);
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
118 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
119
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
120 std::string ToUtf8String(const QString& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
121 QByteArray ba = string.toUtf8();
77
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
122 return std::string(ba.constData(), ba.size());
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
123 }
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
124
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
125 std::string ToUtf8String(const QByteArray& ba) {
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
126 return std::string(ba.constData(), ba.size());
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
127 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
128
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
129 QString ToQString(const std::string& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
130 return QString::fromUtf8(string.c_str(), string.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
131 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
132
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
133 QString ToQString(const std::wstring& wstring) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
134 return QString::fromWCharArray(wstring.c_str(), wstring.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
135 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
136
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
137 } // namespace Strings