annotate src/core/strings.cc @ 321:8141f409d52c

services/anilist: fix getting producers should studios and producers be stored separately?
author Paper <paper@paper.us.eu.org>
date Wed, 12 Jun 2024 20:42:44 -0400
parents 1b5c04268d6a
children c32467cd06bb
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
1 /**
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
2 * strings.cpp: Useful functions for manipulating strings
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
3 **/
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
4 #include "core/strings.h"
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
5 #include "core/session.h" // locale
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
6
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
7 #include <QByteArray>
101
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
8 #include <QDebug>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
9 #include <QLocale>
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
10 #include <QString>
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
11 #include <QTextDocument>
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
12
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
13 #include <algorithm>
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
14 #include <cctype>
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
15 #include <codecvt>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
16 #include <iostream>
273
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
17 #include <iomanip>
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
18 #include <locale>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
19 #include <string>
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
20 #include <unordered_map>
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
21 #include <vector>
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
22
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
23 #include "utf8proc.h"
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
24
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
25 namespace Strings {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
26
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
27 /* ew */
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
28 std::string Implode(const std::vector<std::string>& vector, const std::string& delimiter) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
29 if (vector.size() < 1)
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
30 return "";
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
31
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
32 std::string out;
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
33
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
34 for (unsigned long long i = 0; i < vector.size(); i++) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
35 out.append(vector.at(i));
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
36 if (i < vector.size() - 1)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
37 out.append(delimiter);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
38 }
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
39
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
40 return out;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
41 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
42
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
43 std::vector<std::string> Split(const std::string& text, const std::string& delimiter) {
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
44 if (text.length() < 1)
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
45 return {};
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
46
118
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
47 std::vector<std::string> tokens;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
48
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
49 std::size_t start = 0, end = 0;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
50 while ((end = text.find(delimiter, start)) != std::string::npos) {
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
51 tokens.push_back(text.substr(start, end - start));
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
52 start = end + delimiter.length();
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
53 }
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
54 tokens.push_back(text.substr(start));
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
55
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
56 return tokens;
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
57 }
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
58
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
59 /* This function is really only used for cleaning up the synopsis of
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
60 * horrible HTML debris from AniList :)
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
61 */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
62 void ReplaceAll(std::string& string, std::string_view find, std::string_view replace) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
63 size_t pos = 0;
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
64 while ((pos = string.find(find, pos)) != std::string::npos) {
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
65 string.replace(pos, find.length(), replace);
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
66 pos += replace.length();
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
67 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
68 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
69
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
70 void SanitizeLineEndings(std::string& string) {
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
71 /* LOL */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
72 ReplaceAll(string, "\r\n", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
73 ReplaceAll(string, "</p>", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
74 ReplaceAll(string, "<br>", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
75 ReplaceAll(string, "<br />", "\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
76 ReplaceAll(string, "\n\n\n", "\n\n");
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
77 }
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
78
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
79 void ConvertRomanNumerals(std::string& string) {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
80 static const std::vector<std::pair<std::string_view, std::string_view>> vec = {
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
81 {"2", "II"}, {"3", "III"}, {"4", "IV"}, {"5", "V"}, {"6", "VI"},
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
82 {"7", "VII"}, {"8", "VIII"}, {"9", "IX"}, {"11", "XI"}, {"12", "XII"},
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
83 {"13", "XIII"}
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
84 };
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
85
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
86 for (const auto& item : vec)
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
87 ReplaceAll(string, item.second, item.first);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
88 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
89
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
90 /* this also performs case folding, so our string is lowercase after this */
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
91 void NormalizeUnicode(std::string& string) {
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
92 static constexpr utf8proc_option_t options = static_cast<utf8proc_option_t>(
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
93 UTF8PROC_COMPAT | UTF8PROC_COMPOSE | UTF8PROC_STABLE |
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
94 UTF8PROC_IGNORE | UTF8PROC_STRIPCC | UTF8PROC_STRIPMARK |
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
95 UTF8PROC_LUMP | UTF8PROC_CASEFOLD | UTF8PROC_NLF2LS
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
96 );
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
97
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
98 /* ack */
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
99 utf8proc_uint8_t* buf = nullptr;
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
100
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
101 const utf8proc_ssize_t size = utf8proc_map(
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
102 reinterpret_cast<const utf8proc_uint8_t*>(string.data()),
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
103 string.size(),
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
104 &buf,
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
105 options
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
106 );
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
107
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
108 if (size)
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
109 string = std::string(reinterpret_cast<const char*>(buf), size);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
110
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
111 if (buf)
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
112 free(buf);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
113 }
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
114
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
115 void NormalizeAnimeTitle(std::string& string) {
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
116 ConvertRomanNumerals(string);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
117 NormalizeUnicode(string);
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
118 RemoveLeadingChars(string, ' ');
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
119 RemoveTrailingChars(string, ' ');
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
120 }
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
121
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
122 /* removes dumb HTML tags because anilist is aids and
250
c130f47f6f48 *: many many changes
Paper <paper@paper.us.eu.org>
parents: 231
diff changeset
123 * gives us HTML for synopses :/
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
124 */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
125 void RemoveHtmlTags(std::string& string) {
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
126 while (string.find("<") != std::string::npos) {
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
127 auto startpos = string.find("<");
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
128 auto endpos = string.find(">") + 1;
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
129
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
130 if (endpos != std::string::npos)
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
131 string.erase(startpos, endpos - startpos);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
132 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
133 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
134
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
135 /* e.g. "&lt;" for "<" */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
136 void ParseHtmlEntities(std::string& string) {
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
137 const std::unordered_map<std::string, std::string> map = {
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
138 {"&lt;", "<"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
139 {"&rt;", ">"},
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
140 {"&nbsp;", "\xA0"},
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
141 {"&amp;", "&"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
142 {"&quot;", "\""},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
143 {"&apos;", "'"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
144 {"&cent;", "¢"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
145 {"&pound;", "£"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
146 {"&euro;", "€"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
147 {"&yen;", "Â¥"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
148 {"&copy;", "©"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
149 {"&reg;", "®"},
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
150 {"&times;", "×"},
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
151 {"&rsquo;", "’" } // Haibane Renmei, AniList
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
152 };
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
153
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
154 for (const auto& item : map)
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
155 ReplaceAll(string, item.first, item.second);
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
156 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
157
99
503bc1547d49 strings: clarify on some functions and make some of them miniscule
Paper <mrpapersonic@gmail.com>
parents: 98
diff changeset
158 /* removes stupid HTML stuff */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
159 void TextifySynopsis(std::string& string) {
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
160 #if 1
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
161 /* Just let Qt deal with it. */
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
162 QTextDocument text;
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
163 text.setHtml(Strings::ToQString(string));
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
164 string = Strings::ToUtf8String(text.toPlainText());
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
165 #else
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
166 /* old implementation here... beware of jankiness */
260
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
167 SanitizeLineEndings(string);
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
168 RemoveHtmlTags(string);
dd211ff68b36 pages/seasons: add initial functionality
Paper <paper@paper.us.eu.org>
parents: 258
diff changeset
169 ParseHtmlEntities(string);
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
170 #endif
98
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
171 }
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
172
582b2fca1561 strings: parse HTML entities when reading synopsis, make the
Paper <mrpapersonic@gmail.com>
parents: 81
diff changeset
173 /* let Qt handle the heavy lifting of locale shit
221
53211cb1e7f5 library: add initial library stuff
Paper <paper@paper.us.eu.org>
parents: 211
diff changeset
174 * I don't want to deal with
258
862d0d8619f6 *: HUUUGE changes
Paper <paper@paper.us.eu.org>
parents: 250
diff changeset
175 */
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
176 std::string ToUpper(const std::string& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
177 return ToUtf8String(session.config.locale.GetLocale().toUpper(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
178 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
179
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
180 std::string ToLower(const std::string& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
181 return ToUtf8String(session.config.locale.GetLocale().toLower(ToQString(string)));
15
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
182 }
cde8f67a7c7d *: update, megacommit :)
Paper <mrpapersonic@gmail.com>
parents: 9
diff changeset
183
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
184 std::wstring ToWstring(const std::string& string) {
230
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
185 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
186
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
187 std::wstring wstr;
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
188 try {
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
189 wstr = converter.from_bytes(string);
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
190 } catch (std::range_error const& ex) {
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
191 std::cerr << "Failed to convert UTF-8 to wide string!" << std::endl;
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
192 }
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
193 return wstr;
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
194 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
195
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
196 std::wstring ToWstring(const QString& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
197 std::wstring arr(string.size(), L'\0');
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
198 string.toWCharArray(&arr.front());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
199 return arr;
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
200 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
201
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
202 std::string ToUtf8String(const std::wstring& wstring) {
230
2f5a9247e501 torrents: implement download button
Paper <paper@paper.us.eu.org>
parents: 221
diff changeset
203 static std::wstring_convert<std::codecvt_utf8<wchar_t>> converter("", L"");
62
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
204 return converter.to_bytes(wstring);
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
205 }
4c6dd5999b39 *: update
Paper <mrpapersonic@gmail.com>
parents: 15
diff changeset
206
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
207 std::string ToUtf8String(const QString& string) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
208 const QByteArray ba = string.toUtf8();
77
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
209 return std::string(ba.constData(), ba.size());
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
210 }
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
211
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
212 std::string ToUtf8String(const QByteArray& ba) {
6f7385bd334c *: update
Paper <mrpapersonic@gmail.com>
parents: 76
diff changeset
213 return std::string(ba.constData(), ba.size());
64
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
214 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
215
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
216 QString ToQString(const std::string& string) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
217 return QString::fromUtf8(string.c_str(), string.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
218 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
219
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
220 QString ToQString(const std::wstring& wstring) {
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
221 return QString::fromWCharArray(wstring.c_str(), wstring.length());
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
222 }
fe719c109dbc *: update
Paper <mrpapersonic@gmail.com>
parents: 62
diff changeset
223
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
224 std::string ToUtf8String(const bool b) {
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
225 return b ? "true" : "false"; // lol
101
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
226 }
c537996cf67b *: multitude of config changes
Paper <mrpapersonic@gmail.com>
parents: 100
diff changeset
227
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
228 bool ToBool(const std::string& str, bool def) {
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
229 std::istringstream s(Strings::ToLower(str));
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
230 s >> std::boolalpha >> def;
116
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
231 return def;
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
232 }
254b1d2b7096 settings: add torrents page, make rss feed configurable
Paper <mrpapersonic@gmail.com>
parents: 114
diff changeset
233
211
7cf53145de11 strings: use templates for ToInt, std::to_string -> Strings::ToUtf8String
Paper <mrpapersonic@gmail.com>
parents: 187
diff changeset
234 /* util funcs */
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
235 uint64_t HumanReadableSizeToBytes(const std::string& str) {
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
236 static const std::unordered_map<std::string, uint64_t> bytes_map = {
273
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
237 {"KB", 1000ull},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
238 {"MB", 1000000ull},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
239 {"GB", 1000000000ull},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
240 {"TB", 1000000000000ull},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
241 {"PB", 1000000000000000ull},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
242 {"KiB", 1ull << 10},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
243 {"MiB", 1ull << 20},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
244 {"GiB", 1ull << 30},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
245 {"TiB", 1ull << 40},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
246 {"PiB", 1ull << 50} /* surely we won't need more than this */
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
247 };
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
248
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
249 for (const auto& suffix : bytes_map) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
250 if (str.find(suffix.first) != std::string::npos) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
251 try {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
252 uint64_t size = std::stod(str) * suffix.second;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
253 return size;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
254 } catch (std::invalid_argument const& ex) {
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
255 continue;
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
256 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
257 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
258 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
259
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
260 return ToInt(str, 0);
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
261 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
262
273
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
263 std::string BytesToHumanReadableSize(uint64_t bytes, int precision) {
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
264 #if QT_VERSION >= QT_VERSION_CHECK(5, 10, 0)
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
265 /* QLocale in Qt >= 5.10.0 has a function for this */
274
f6a756c19bfb anime_list.cc: use mutexes so we don't sex the stack
Paper <paper@paper.us.eu.org>
parents: 273
diff changeset
266 return Strings::ToUtf8String(session.config.locale.GetLocale().formattedDataSize(bytes, precision));
273
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
267 #else
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
268 static const std::unordered_map<uint64_t, std::string> map = {
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
269 {1ull << 10, "KiB"},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
270 {1ull << 20, "MiB"},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
271 {1ull << 30, "GiB"},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
272 {1ull << 40, "TiB"},
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
273 {1ull << 50, "PiB"}
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
274 };
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
275
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
276 for (const auto& suffix : map) {
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
277 if (bytes / suffix.first < 1)
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
278 continue;
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
279
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
280 std::stringstream ss;
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
281 ss << std::setprecision(precision)
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
282 << (static_cast<double>(bytes) / suffix.first) << " "
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
283 << suffix.second;
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
284 return ss.str();
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
285 }
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
286
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
287 /* better luck next time */
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
288 return "0 bytes";
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
289 #endif
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
290 }
f31305b9f60a *: various code safety changes
Paper <paper@paper.us.eu.org>
parents: 264
diff changeset
291
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
292 void RemoveLeadingChars(std::string& s, const char c) {
118
39521c47c7a3 *: another huge megacommit, SORRY
Paper <mrpapersonic@gmail.com>
parents: 116
diff changeset
293 s.erase(0, std::min(s.find_first_not_of(c), s.size() - 1));
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
294 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
295
264
9a04802848c0 *: improve multiple things
Paper <paper@paper.us.eu.org>
parents: 260
diff changeset
296 void RemoveTrailingChars(std::string& s, const char c) {
114
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
297 s.erase(s.find_last_not_of(c) + 1, std::string::npos);
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
298 }
ab191e28e69d *: add initial torrent stuff
Paper <mrpapersonic@gmail.com>
parents: 102
diff changeset
299
102
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
300 bool BeginningMatchesSubstring(const std::string& str, const std::string& sub) {
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
301 for (unsigned long long i = 0; i < str.length() && i < sub.length(); i++)
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
302 if (str[i] != sub[i])
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
303 return false;
187
9613d72b097e *: multiple performance improvements
Paper <mrpapersonic@gmail.com>
parents: 120
diff changeset
304
102
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
305 return true;
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
306 }
b315f3759c56 *: big patch
Paper <mrpapersonic@gmail.com>
parents: 101
diff changeset
307
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
308 } // namespace Strings