annotate dep/anitomy/anitomy/anitomy.cpp @ 391:c3f717b7321b

filesystem: only iterate over the list once when erasing deleted files :)
author Paper <paper@tflc.us>
date Fri, 07 Nov 2025 07:09:31 -0500
parents a0aa8c8c4307
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
1 /*
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
2 ** Copyright (c) 2014-2017, Eren Okka
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
3 **
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
4 ** This Source Code Form is subject to the terms of the Mozilla Public
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
5 ** License, v. 2.0. If a copy of the MPL was not distributed with this
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
6 ** file, You can obtain one at https://mozilla.org/MPL/2.0/.
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
7 */
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
8
347
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
9 #include <codecvt>
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
10 #include <locale>
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
11
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
12 #include "anitomy.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
13 #include "keyword.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
14 #include "parser.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
15 #include "string.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
16 #include "tokenizer.h"
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
17
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
18 namespace anitomy {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
19
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
20 bool Anitomy::Parse(string_t filename) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
21 elements_.clear();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
22 tokens_.clear();
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
23
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
24 if (options_.parse_file_extension) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
25 string_t extension;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
26 if (RemoveExtensionFromFilename(filename, extension))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
27 elements_.insert(kElementFileExtension, extension);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
28 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
29
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
30 if (!options_.ignored_strings.empty())
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
31 RemoveIgnoredStrings(filename);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
32
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
33 if (filename.empty())
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
34 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
35 elements_.insert(kElementFileName, filename);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
36
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
37 Tokenizer tokenizer(filename, elements_, options_, tokens_);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
38 if (!tokenizer.Tokenize())
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
39 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
40
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
41 Parser parser(elements_, options_, tokens_);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
42 if (!parser.Parse())
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
43 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
44
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
45 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
46 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
47
347
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
48 /* assumes UTF-8 */
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
49 bool Anitomy::Parse(const std::string& filename) {
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
50 static std::wstring_convert<std::codecvt_utf8_utf16<char_t>, char_t> ucs4conv;
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
51 return Parse(ucs4conv.from_bytes(filename));
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
52 }
a0aa8c8c4307 dep/anitomy: port to use UCS-4 rather than wide strings
Paper <paper@paper.us.eu.org>
parents: 9
diff changeset
53
9
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
54 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
55
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
56 bool Anitomy::RemoveExtensionFromFilename(string_t& filename, string_t& extension) const {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
57 const size_t position = filename.find_last_of(L'.');
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
58
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
59 if (position == string_t::npos)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
60 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
61
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
62 extension = filename.substr(position + 1);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
63
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
64 const size_t max_length = 4;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
65 if (extension.length() > max_length)
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
66 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
67
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
68 if (!IsAlphanumericString(extension))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
69 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
70
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
71 auto keyword = keyword_manager.Normalize(extension);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
72 if (!keyword_manager.Find(kElementFileExtension, keyword))
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
73 return false;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
74
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
75 filename.resize(position);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
76
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
77 return true;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
78 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
79
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
80 void Anitomy::RemoveIgnoredStrings(string_t& filename) const {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
81 for (const auto& str : options_.ignored_strings) {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
82 EraseString(filename, str);
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
83 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
84 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
85
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
86 ////////////////////////////////////////////////////////////////////////////////
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
87
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
88 Elements& Anitomy::elements() {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
89 return elements_;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
90 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
91
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
92 Options& Anitomy::options() {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
93 return options_;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
94 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
95
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
96 const token_container_t& Anitomy::tokens() const {
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
97 return tokens_;
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
98 }
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
99
5c0397762b53 INCOMPLETE: megacommit :)
Paper <mrpapersonic@gmail.com>
parents:
diff changeset
100 } // namespace anitomy