Mercurial > minori
comparison dep/toml11/toml/region.hpp @ 318:3b355fa948c7
config: use TOML instead of INI
unfortunately, INI is not enough, and causes some paths including
semicolons to break with our current storage of the library folders.
so, I decided to switch to TOML which does support real arrays...
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Wed, 12 Jun 2024 05:25:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
317:b1f4d1867ab1 | 318:3b355fa948c7 |
---|---|
1 // Copyright Toru Niina 2017. | |
2 // Distributed under the MIT License. | |
3 #ifndef TOML11_REGION_HPP | |
4 #define TOML11_REGION_HPP | |
5 #include <memory> | |
6 #include <vector> | |
7 #include <algorithm> | |
8 #include <initializer_list> | |
9 #include <iterator> | |
10 #include <iomanip> | |
11 #include <cassert> | |
12 #include "color.hpp" | |
13 | |
14 namespace toml | |
15 { | |
16 namespace detail | |
17 { | |
18 | |
19 // helper function to avoid std::string(0, 'c') or std::string(iter, iter) | |
20 template<typename Iterator> | |
21 std::string make_string(Iterator first, Iterator last) | |
22 { | |
23 if(first == last) {return "";} | |
24 return std::string(first, last); | |
25 } | |
26 inline std::string make_string(std::size_t len, char c) | |
27 { | |
28 if(len == 0) {return "";} | |
29 return std::string(len, c); | |
30 } | |
31 | |
32 // region_base is a base class of location and region that are defined below. | |
33 // it will be used to generate better error messages. | |
34 struct region_base | |
35 { | |
36 region_base() = default; | |
37 virtual ~region_base() = default; | |
38 region_base(const region_base&) = default; | |
39 region_base(region_base&& ) = default; | |
40 region_base& operator=(const region_base&) = default; | |
41 region_base& operator=(region_base&& ) = default; | |
42 | |
43 virtual bool is_ok() const noexcept {return false;} | |
44 virtual char front() const noexcept {return '\0';} | |
45 | |
46 virtual std::string str() const {return std::string("unknown region");} | |
47 virtual std::string name() const {return std::string("unknown file");} | |
48 virtual std::string line() const {return std::string("unknown line");} | |
49 virtual std::string line_num() const {return std::string("?");} | |
50 | |
51 // length of the region | |
52 virtual std::size_t size() const noexcept {return 0;} | |
53 // number of characters in the line before the region | |
54 virtual std::size_t before() const noexcept {return 0;} | |
55 // number of characters in the line after the region | |
56 virtual std::size_t after() const noexcept {return 0;} | |
57 | |
58 virtual std::vector<std::string> comments() const {return {};} | |
59 // ```toml | |
60 // # comment_before | |
61 // key = "value" # comment_inline | |
62 // ``` | |
63 }; | |
64 | |
65 // location represents a position in a container, which contains a file content. | |
66 // it can be considered as a region that contains only one character. | |
67 // | |
68 // it contains pointer to the file content and iterator that points the current | |
69 // location. | |
70 struct location final : public region_base | |
71 { | |
72 using const_iterator = typename std::vector<char>::const_iterator; | |
73 using difference_type = typename std::iterator_traits<const_iterator>::difference_type; | |
74 using source_ptr = std::shared_ptr<const std::vector<char>>; | |
75 | |
76 location(std::string source_name, std::vector<char> cont) | |
77 : source_(std::make_shared<std::vector<char>>(std::move(cont))), | |
78 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin()) | |
79 {} | |
80 location(std::string source_name, const std::string& cont) | |
81 : source_(std::make_shared<std::vector<char>>(cont.begin(), cont.end())), | |
82 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin()) | |
83 {} | |
84 | |
85 location(const location&) = default; | |
86 location(location&&) = default; | |
87 location& operator=(const location&) = default; | |
88 location& operator=(location&&) = default; | |
89 ~location() = default; | |
90 | |
91 bool is_ok() const noexcept override {return static_cast<bool>(source_);} | |
92 char front() const noexcept override {return *iter_;} | |
93 | |
94 // this const prohibits codes like `++(loc.iter())`. | |
95 std::add_const<const_iterator>::type iter() const noexcept {return iter_;} | |
96 | |
97 const_iterator begin() const noexcept {return source_->cbegin();} | |
98 const_iterator end() const noexcept {return source_->cend();} | |
99 | |
100 // XXX `location::line_num()` used to be implemented using `std::count` to | |
101 // count a number of '\n'. But with a long toml file (typically, 10k lines), | |
102 // it becomes intolerably slow because each time it generates error messages, | |
103 // it counts '\n' from thousands of characters. To workaround it, I decided | |
104 // to introduce `location::line_number_` member variable and synchronize it | |
105 // to the location changes the point to look. So an overload of `iter()` | |
106 // which returns mutable reference is removed and `advance()`, `retrace()` | |
107 // and `reset()` is added. | |
108 void advance(difference_type n = 1) noexcept | |
109 { | |
110 this->line_number_ += static_cast<std::size_t>( | |
111 std::count(this->iter_, std::next(this->iter_, n), '\n')); | |
112 this->iter_ += n; | |
113 return; | |
114 } | |
115 void retrace(difference_type n = 1) noexcept | |
116 { | |
117 this->line_number_ -= static_cast<std::size_t>( | |
118 std::count(std::prev(this->iter_, n), this->iter_, '\n')); | |
119 this->iter_ -= n; | |
120 return; | |
121 } | |
122 void reset(const_iterator rollback) noexcept | |
123 { | |
124 // since c++11, std::distance works in both ways for random-access | |
125 // iterators and returns a negative value if `first > last`. | |
126 if(0 <= std::distance(rollback, this->iter_)) // rollback < iter | |
127 { | |
128 this->line_number_ -= static_cast<std::size_t>( | |
129 std::count(rollback, this->iter_, '\n')); | |
130 } | |
131 else // iter < rollback [[unlikely]] | |
132 { | |
133 this->line_number_ += static_cast<std::size_t>( | |
134 std::count(this->iter_, rollback, '\n')); | |
135 } | |
136 this->iter_ = rollback; | |
137 return; | |
138 } | |
139 | |
140 std::string str() const override {return make_string(1, *this->iter());} | |
141 std::string name() const override {return source_name_;} | |
142 | |
143 std::string line_num() const override | |
144 { | |
145 return std::to_string(this->line_number_); | |
146 } | |
147 | |
148 std::string line() const override | |
149 { | |
150 return make_string(this->line_begin(), this->line_end()); | |
151 } | |
152 | |
153 const_iterator line_begin() const noexcept | |
154 { | |
155 using reverse_iterator = std::reverse_iterator<const_iterator>; | |
156 return std::find(reverse_iterator(this->iter()), | |
157 reverse_iterator(this->begin()), '\n').base(); | |
158 } | |
159 const_iterator line_end() const noexcept | |
160 { | |
161 return std::find(this->iter(), this->end(), '\n'); | |
162 } | |
163 | |
164 // location is always points a character. so the size is 1. | |
165 std::size_t size() const noexcept override | |
166 { | |
167 return 1u; | |
168 } | |
169 std::size_t before() const noexcept override | |
170 { | |
171 const auto sz = std::distance(this->line_begin(), this->iter()); | |
172 assert(sz >= 0); | |
173 return static_cast<std::size_t>(sz); | |
174 } | |
175 std::size_t after() const noexcept override | |
176 { | |
177 const auto sz = std::distance(this->iter(), this->line_end()); | |
178 assert(sz >= 0); | |
179 return static_cast<std::size_t>(sz); | |
180 } | |
181 | |
182 source_ptr const& source() const& noexcept {return source_;} | |
183 source_ptr&& source() && noexcept {return std::move(source_);} | |
184 | |
185 private: | |
186 | |
187 source_ptr source_; | |
188 std::size_t line_number_; | |
189 std::string source_name_; | |
190 const_iterator iter_; | |
191 }; | |
192 | |
193 // region represents a range in a container, which contains a file content. | |
194 // | |
195 // it contains pointer to the file content and iterator that points the first | |
196 // and last location. | |
197 struct region final : public region_base | |
198 { | |
199 using const_iterator = typename std::vector<char>::const_iterator; | |
200 using source_ptr = std::shared_ptr<const std::vector<char>>; | |
201 | |
202 // delete default constructor. source_ never be null. | |
203 region() = delete; | |
204 | |
205 explicit region(const location& loc) | |
206 : source_(loc.source()), source_name_(loc.name()), | |
207 first_(loc.iter()), last_(loc.iter()) | |
208 {} | |
209 explicit region(location&& loc) | |
210 : source_(loc.source()), source_name_(loc.name()), | |
211 first_(loc.iter()), last_(loc.iter()) | |
212 {} | |
213 | |
214 region(const location& loc, const_iterator f, const_iterator l) | |
215 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l) | |
216 {} | |
217 region(location&& loc, const_iterator f, const_iterator l) | |
218 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l) | |
219 {} | |
220 | |
221 region(const region&) = default; | |
222 region(region&&) = default; | |
223 region& operator=(const region&) = default; | |
224 region& operator=(region&&) = default; | |
225 ~region() = default; | |
226 | |
227 region& operator+=(const region& other) | |
228 { | |
229 // different regions cannot be concatenated | |
230 assert(this->source_ == other.source_ && this->last_ == other.first_); | |
231 | |
232 this->last_ = other.last_; | |
233 return *this; | |
234 } | |
235 | |
236 bool is_ok() const noexcept override {return static_cast<bool>(source_);} | |
237 char front() const noexcept override {return *first_;} | |
238 | |
239 std::string str() const override {return make_string(first_, last_);} | |
240 std::string line() const override | |
241 { | |
242 if(this->contain_newline()) | |
243 { | |
244 return make_string(this->line_begin(), | |
245 std::find(this->line_begin(), this->last(), '\n')); | |
246 } | |
247 return make_string(this->line_begin(), this->line_end()); | |
248 } | |
249 std::string line_num() const override | |
250 { | |
251 return std::to_string(1 + std::count(this->begin(), this->first(), '\n')); | |
252 } | |
253 | |
254 std::size_t size() const noexcept override | |
255 { | |
256 const auto sz = std::distance(first_, last_); | |
257 assert(sz >= 0); | |
258 return static_cast<std::size_t>(sz); | |
259 } | |
260 std::size_t before() const noexcept override | |
261 { | |
262 const auto sz = std::distance(this->line_begin(), this->first()); | |
263 assert(sz >= 0); | |
264 return static_cast<std::size_t>(sz); | |
265 } | |
266 std::size_t after() const noexcept override | |
267 { | |
268 const auto sz = std::distance(this->last(), this->line_end()); | |
269 assert(sz >= 0); | |
270 return static_cast<std::size_t>(sz); | |
271 } | |
272 | |
273 bool contain_newline() const noexcept | |
274 { | |
275 return std::find(this->first(), this->last(), '\n') != this->last(); | |
276 } | |
277 | |
278 const_iterator line_begin() const noexcept | |
279 { | |
280 using reverse_iterator = std::reverse_iterator<const_iterator>; | |
281 return std::find(reverse_iterator(this->first()), | |
282 reverse_iterator(this->begin()), '\n').base(); | |
283 } | |
284 const_iterator line_end() const noexcept | |
285 { | |
286 return std::find(this->last(), this->end(), '\n'); | |
287 } | |
288 | |
289 const_iterator begin() const noexcept {return source_->cbegin();} | |
290 const_iterator end() const noexcept {return source_->cend();} | |
291 const_iterator first() const noexcept {return first_;} | |
292 const_iterator last() const noexcept {return last_;} | |
293 | |
294 source_ptr const& source() const& noexcept {return source_;} | |
295 source_ptr&& source() && noexcept {return std::move(source_);} | |
296 | |
297 std::string name() const override {return source_name_;} | |
298 | |
299 std::vector<std::string> comments() const override | |
300 { | |
301 // assuming the current region (`*this`) points a value. | |
302 // ```toml | |
303 // a = "value" | |
304 // ^^^^^^^- this region | |
305 // ``` | |
306 using rev_iter = std::reverse_iterator<const_iterator>; | |
307 | |
308 std::vector<std::string> com{}; | |
309 { | |
310 // find comments just before the current region. | |
311 // ```toml | |
312 // # this should be collected. | |
313 // # this also. | |
314 // a = value # not this. | |
315 // ``` | |
316 | |
317 // # this is a comment for `a`, not array elements. | |
318 // a = [1, 2, 3, 4, 5] | |
319 if(this->first() == std::find_if(this->line_begin(), this->first(), | |
320 [](const char c) noexcept -> bool {return c == '[' || c == '{';})) | |
321 { | |
322 auto iter = this->line_begin(); // points the first character | |
323 while(iter != this->begin()) | |
324 { | |
325 iter = std::prev(iter); | |
326 | |
327 // range [line_start, iter) represents the previous line | |
328 const auto line_start = std::find( | |
329 rev_iter(iter), rev_iter(this->begin()), '\n').base(); | |
330 const auto comment_found = std::find(line_start, iter, '#'); | |
331 if(comment_found == iter) | |
332 { | |
333 break; // comment not found. | |
334 } | |
335 | |
336 // exclude the following case. | |
337 // > a = "foo" # comment // <-- this is not a comment for b but a. | |
338 // > b = "current value" | |
339 if(std::all_of(line_start, comment_found, | |
340 [](const char c) noexcept -> bool { | |
341 return c == ' ' || c == '\t'; | |
342 })) | |
343 { | |
344 // unwrap the first '#' by std::next. | |
345 auto s = make_string(std::next(comment_found), iter); | |
346 if(!s.empty() && s.back() == '\r') {s.pop_back();} | |
347 com.push_back(std::move(s)); | |
348 } | |
349 else | |
350 { | |
351 break; | |
352 } | |
353 iter = line_start; | |
354 } | |
355 } | |
356 } | |
357 | |
358 if(com.size() > 1) | |
359 { | |
360 std::reverse(com.begin(), com.end()); | |
361 } | |
362 | |
363 { | |
364 // find comments just after the current region. | |
365 // ```toml | |
366 // # not this. | |
367 // a = value # this one. | |
368 // a = [ # not this (technically difficult) | |
369 // | |
370 // ] # and this. | |
371 // ``` | |
372 // The reason why it's difficult is that it requires parsing in the | |
373 // following case. | |
374 // ```toml | |
375 // a = [ 10 # this comment is for `10`. not for `a` but `a[0]`. | |
376 // # ... | |
377 // ] # this is apparently a comment for a. | |
378 // | |
379 // b = [ | |
380 // 3.14 ] # there is no way to add a comment to `3.14` currently. | |
381 // | |
382 // c = [ | |
383 // 3.14 # do this if you need a comment here. | |
384 // ] | |
385 // ``` | |
386 const auto comment_found = | |
387 std::find(this->last(), this->line_end(), '#'); | |
388 if(comment_found != this->line_end()) // '#' found | |
389 { | |
390 // table = {key = "value"} # what is this for? | |
391 // the above comment is not for "value", but {key="value"}. | |
392 if(comment_found == std::find_if(this->last(), comment_found, | |
393 [](const char c) noexcept -> bool { | |
394 return !(c == ' ' || c == '\t' || c == ','); | |
395 })) | |
396 { | |
397 // unwrap the first '#' by std::next. | |
398 auto s = make_string(std::next(comment_found), this->line_end()); | |
399 if(!s.empty() && s.back() == '\r') {s.pop_back();} | |
400 com.push_back(std::move(s)); | |
401 } | |
402 } | |
403 } | |
404 return com; | |
405 } | |
406 | |
407 private: | |
408 | |
409 source_ptr source_; | |
410 std::string source_name_; | |
411 const_iterator first_, last_; | |
412 }; | |
413 | |
414 } // detail | |
415 } // toml | |
416 #endif// TOML11_REGION_H |