comparison src/core/strings.cc @ 98:582b2fca1561

strings: parse HTML entities when reading synopsis, make the toupper and tolower functions more sane
author Paper <mrpapersonic@gmail.com>
date Thu, 02 Nov 2023 15:22:02 -0400 (14 months ago)
parents 9b2b41f83a5e
children 503bc1547d49
comparison
equal deleted inserted replaced
97:18979b066284 98:582b2fca1561
2 * strings.cpp: Useful functions for manipulating strings 2 * strings.cpp: Useful functions for manipulating strings
3 **/ 3 **/
4 #include "core/strings.h" 4 #include "core/strings.h"
5 #include <QByteArray> 5 #include <QByteArray>
6 #include <QString> 6 #include <QString>
7 #include <QLocale>
7 #include <algorithm> 8 #include <algorithm>
8 #include <cctype> 9 #include <cctype>
9 #include <codecvt> 10 #include <codecvt>
10 #include <locale> 11 #include <locale>
11 #include <string> 12 #include <string>
23 out.append(delimiter); 24 out.append(delimiter);
24 } 25 }
25 return out; 26 return out;
26 } 27 }
27 28
28 std::string ReplaceAll(const std::string& string, const std::string& find, const std::string& replace) { 29 std::string ReplaceAll(std::string string, const std::string& find, const std::string& replace) {
29 std::string result; 30 size_t pos = 0;
30 size_t pos, find_len = find.size(), from = 0; 31 while ((pos = string.find(find, pos)) != std::string::npos) {
31 while ((pos = string.find(find, from)) != std::string::npos) { 32 string.replace(pos, find.length(), replace);
32 result.append(string, from, pos - from); 33 pos += replace.length();
33 result.append(replace);
34 from = pos + find_len;
35 } 34 }
36 result.append(string, from, std::string::npos); 35 return string;
37 return result;
38 } 36 }
39 37
40 /* this function probably fucks your RAM but whatevs */ 38 /* :) */
41 std::string SanitizeLineEndings(const std::string& string) { 39 std::string SanitizeLineEndings(const std::string& string) {
42 std::string result(string); 40 return ReplaceAll(ReplaceAll(ReplaceAll(string, "\r\n", "\n"), "<br>", "\n"), "\n\n\n", "\n\n");
43 result = ReplaceAll(result, "\r\n", "\n");
44 result = ReplaceAll(result, "<br>", "\n");
45 result = ReplaceAll(result, "\n\n\n", "\n\n");
46 return result;
47 } 41 }
48 42
43 /* removes dumb HTML tags because anilist is aids and
44 gives us HTML for synopses :/ */
49 std::string RemoveHtmlTags(const std::string& string) { 45 std::string RemoveHtmlTags(const std::string& string) {
50 std::string html(string); 46 std::string html(string);
51 while (html.find("<") != std::string::npos) { 47 while (html.find("<") != std::string::npos) {
52 auto startpos = html.find("<"); 48 auto startpos = html.find("<");
53 auto endpos = html.find(">") + 1; 49 auto endpos = html.find(">") + 1;
57 } 53 }
58 } 54 }
59 return html; 55 return html;
60 } 56 }
61 57
62 std::string TextifySynopsis(const std::string& string) { 58 /* e.g. "&lt;" for "<" */
63 return RemoveHtmlTags(SanitizeLineEndings(string)); 59 std::string ParseHtmlEntities(const std::string& string) {
60 const std::unordered_map<std::string, std::string> map = {
61 {"&lt;", "<"},
62 {"&rt;", ">"},
63 {"&nbsp;", "\xA0"},
64 {"&amp;", "&"},
65 {"&quot;", "\""},
66 {"&apos;", "'"},
67 {"&cent;", "¢"},
68 {"&pound;", "£"},
69 {"&euro;", "€"},
70 {"&yen;", "¥"},
71 {"&copy;", "©"},
72 {"&reg;", "®"},
73 {"&rsquo;", "’"} // Haibane Renmei, AniList
74 };
75
76 std::string ret = string;
77 for (const auto& item : map)
78 ret = ReplaceAll(ret, item.first, item.second);
79 return ret;
64 } 80 }
65 81
66 /* these functions suck for i18n!... 82 /* */
67 but we only use them with JSON 83 std::string TextifySynopsis(const std::string& string) {
68 stuff anyway */ 84 return ParseHtmlEntities(RemoveHtmlTags(SanitizeLineEndings(string)));
85 }
86
87 /* let Qt handle the heavy lifting of locale shit
88 I don't want to deal with */
69 std::string ToUpper(const std::string& string) { 89 std::string ToUpper(const std::string& string) {
70 std::string result(string); 90 /* todo: this "locale" will have to be moved to session.h */
71 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::toupper(c); }); 91 QLocale locale;
72 return result; 92 return ToUtf8String(locale.toUpper(ToQString(string)));
73 } 93 }
74 94
75 std::string ToLower(const std::string& string) { 95 std::string ToLower(const std::string& string) {
76 std::string result(string); 96 QLocale locale;
77 std::transform(result.begin(), result.end(), result.begin(), [](unsigned char c) { return std::tolower(c); }); 97 return ToUtf8String(locale.toLower(ToQString(string)));
78 return result;
79 } 98 }
80 99
81 std::wstring ToWstring(const std::string& string) { 100 std::wstring ToWstring(const std::string& string) {
82 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter; 101 std::wstring_convert<std::codecvt_utf8<wchar_t>, wchar_t> converter;
83 return converter.from_bytes(string); 102 return converter.from_bytes(string);