annotate src/core/strings.cc @ 265:ff0b2052b234

*: add missing utf8proc files I'm an idiot LOL
author Paper <paper@paper.us.eu.org>
date Thu, 11 Apr 2024 10:22:05 -0400
parents 9a04802848c0
children f31305b9f60a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
1 /**
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
2 * strings.cpp: Useful functions for manipulating strings
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
3 **/
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
4 #include "core/strings.h"
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
5 #include "core/session.h" // locale
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
6
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
7 #include <QByteArray>
101
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
8 #include <QDebug>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
9 #include <QLocale>
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
10 #include <QString>
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
11
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
12 #include <algorithm>
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
13 #include <cctype>
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
14 #include <codecvt>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
15 #include <iostream>
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
16 #include <locale>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
17 #include <string>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
18 #include <unordered_map>
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
19 #include <vector>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
20
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
21 #include "utf8proc.h"
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
22
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
23 namespace Strings {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
24
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
25 /* ew */
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
26 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
27 if (vector.size() < 1)
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
28 return "";
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
29
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
30 std::string out;
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
31
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
32 for (unsigned long long i = 0; i < vector.size(); i++) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
33 out.append(vector.at(i));
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
34 if (i < vector.size() - 1)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
35 out.append(delimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
36 }
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
37
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
38 return out;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
39 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
40
226
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
41 std::string Implode(const std::set<std::string>& set, const std::string& delimiter) {
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
42 if (set.size() < 1)
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
43 return "";
226
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
44
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
45 std::string out;
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
46
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
47 for (auto it = set.cbegin(); it != set.cend(); it++) {
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
48 out.append(*it);
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
49 if (it != std::prev(set.cend(), 1))
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
50 out.append(delimiter);
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
51 }
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
52
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
53 return out;
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
54 }
f784b5b1914c settings: add library page
Paper <mrpapersonic@gmail.com>
parents: 221
diff changeset
55
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
56 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) {
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
57 if (text.length() < 1)
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
58 return {};
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
59
118
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
60 std::vector<std::string> tokens;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
61
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
62 std::size_t start = 0, end = 0;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
63 while ((end = text.find(delimiter, start)) != std::string::npos) {
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
64 tokens.push_back(text.substr(start, end - start));
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
65 start = end + delimiter.length();
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
66 }
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
67 tokens.push_back(text.substr(start));
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
68
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
69 return tokens;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
70 }
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
71
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
72 /* This function is really only used for cleaning up the synopsis of
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
73 * horrible HTML debris from AniList :)
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
74 */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
75 void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
76 size_t pos = 0;
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
77 while ((pos = string.find(find, pos)) != std::string::npos) {
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
78 string.replace(pos, find.length(), replace);
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
79 pos += replace.length();
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
80 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
81 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
82
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
83 void SanitizeLineEndings(std::string& string) {
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
84 /* LOL */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
85 ReplaceAll(string, "\r\n", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
86 ReplaceAll(string, "</p>", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
87 ReplaceAll(string, "<br>", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
88 ReplaceAll(string, "<br />", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
89 ReplaceAll(string, "\n\n\n", "\n\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
90 }
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
91
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
92 void ConvertRomanNumerals(std::string& string) {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
93 static const std::vector<std::pair<std::string_view, std::string_view>> vec = {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
94 {"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"},
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
95 {"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"},
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
96 {"13", "XIII"}
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
97 };
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
98
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
99 for (const auto& item : vec)
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
100 ReplaceAll(string, item.second, item.first);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
101 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
102
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
103 /* this also performs case folding, so our string is lowercase after this */
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
104 void NormalizeUnicode(std::string& string) {
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
105 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
106 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
107 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
108 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
109 );
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
110
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
111 /* ack */
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
112 utf8proc_uint8_t* buf = nullptr;
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
113
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
114 const utf8proc_ssize_t size = utf8proc_map(
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
115 reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
116 string.size(),
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
117 &buf,
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
118 options
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
119 );
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
120
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
121 if (size)
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
122 string = std::string(reinterpret_cast<const char*>(buf), size);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
123
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
124 if (buf)
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
125 free(buf);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
126 }
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
127
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
128 void NormalizeAnimeTitle(std::string& string) {
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
129 ConvertRomanNumerals(string);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
130 NormalizeUnicode(string);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
131 RemoveLeadingChars(string, ' ');
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
132 RemoveTrailingChars(string, ' ');
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
133 }
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
134
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
135 /* removes dumb HTML tags because anilist is aids and
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
136 * gives us HTML for synopses :/
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
137 */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
138 void RemoveHtmlTags(std::string& string) {
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
139 while (string.find("<") != std::string::npos) {
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
140 auto startpos = string.find("<");
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
141 auto endpos = string.find(">") + 1;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
142
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
143 if (endpos != std::string::npos)
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
144 string.erase(startpos, endpos - startpos);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
145 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
146 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
147
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
148 /* e.g. "&lt;" for "<" */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
149 void ParseHtmlEntities(std::string& string) {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
150 /* The only one of these I can understand using are the first
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
151 * three. why do the rest of these exist?
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
152 *
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
153 * probably mojibake.
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
154 */
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
155 const std::unordered_map<std::string, std::string> map = {
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
156 {"&lt;", "<" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
157 {"&rt;", ">" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
158 {"&nbsp;", "\xA0"},
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
159 {"&amp;", "&" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
160 {"&quot;", "\"" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
161 {"&apos;", "'" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
162 {"&cent;", "¢" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
163 {"&pound;", "£" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
164 {"&euro;", "€" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
165 {"&yen;", "Â¥" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
166 {"&copy;", "©" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
167 {"&reg;", "®" },
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
168 {"&rsquo;", "’" } // Haibane Renmei, AniList
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
169 };
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
170
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
171 for (const auto& item : map)
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
172 ReplaceAll(string, item.first, item.second);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
173 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
174
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
175 /* removes stupid HTML stuff */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
176 void TextifySynopsis(std::string& string) {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
177 SanitizeLineEndings(string);
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
178 RemoveHtmlTags(string);
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
179 ParseHtmlEntities(string);
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
180 }
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
181
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
182 /* let Qt handle the heavy lifting of locale shit
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
183 * I don't want to deal with
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
184 */
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
185 std::string ToUpper(const std::string& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
186 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
187 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
188
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
189 std::string ToLower(const std::string& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
190 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
191 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
192
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
193 std::wstring ToWstring(const std::string& string) {
230
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
194 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
195
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
196 std::wstring wstr;
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
197 try {
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
198 wstr = converter.from_bytes(string);
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
199 } catch (std::range_error const& ex) {
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
200 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
201 }
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
202 return wstr;
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
203 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
204
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
205 std::wstring ToWstring(const QString& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
206 std::wstring arr(string.size(), L'\0');
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
207 string.toWCharArray(&arr.front());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
208 return arr;
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
209 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
210
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
211 std::string ToUtf8String(const std::wstring& wstring) {
230
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
212 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
213 return converter.to_bytes(wstring);
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
214 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
215
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
216 std::string ToUtf8String(const QString& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
217 const QByteArray ba = string.toUtf8();
77
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
218 return std::string(ba.constData(), ba.size());
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
219 }
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
220
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
221 std::string ToUtf8String(const QByteArray& ba) {
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
222 return std::string(ba.constData(), ba.size());
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
223 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
224
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
225 QString ToQString(const std::string& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
226 return QString::fromUtf8(string.c_str(), string.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
227 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
228
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
229 QString ToQString(const std::wstring& wstring) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
230 return QString::fromWCharArray(wstring.c_str(), wstring.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
231 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
232
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
233 std::string ToUtf8String(const bool b) {
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
234 return b ? "true" : "false"; // lol
101
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
235 }
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
236
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
237 bool ToBool(const std::string& str, bool def) {
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
238 std::istringstream s(Strings::ToLower(str));
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
239 s >> std::boolalpha >> def;
116
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
240 return def;
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
241 }
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
242
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
243 /* util funcs */
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
244 uint64_t HumanReadableSizeToBytes(const std::string& str) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
245 static const std::unordered_map<std::string, uint64_t> bytes_map = {
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
246 {"KB", 1ull << 10},
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
247 {"MB", 1ull << 20},
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
248 {"GB", 1ull << 30},
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
249 {"TB", 1ull << 40},
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
250 {"PB", 1ull << 50} /* surely we won't need more than this */
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
251 };
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
252
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
253 for (const auto& suffix : bytes_map) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
254 if (str.find(suffix.first) != std::string::npos) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
255 try {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
256 uint64_t size = std::stod(str) * suffix.second;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
257 return size;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
258 } catch (std::invalid_argument const& ex) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
259 continue;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
260 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
261 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
262 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
263
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
264 return ToInt(str, 0);
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
265 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
266
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
267 void RemoveLeadingChars(std::string& s, const char c) {
118
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
268 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
269 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
270
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
271 void RemoveTrailingChars(std::string& s, const char c) {
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
272 s.erase(s.find_last_not_of(c) + 1, std::string::npos);
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
273 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
274
102
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
275 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
276 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
277 if (str[i] != sub[i])
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
278 return false;
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
279
102
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
280 return true;
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
281 }
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
282
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
283 } // namespace Strings