318
|
1 // Copyright Toru Niina 2017.
|
|
2 // Distributed under the MIT License.
|
|
3 #ifndef TOML11_REGION_HPP
|
|
4 #define TOML11_REGION_HPP
|
|
5 #include <memory>
|
|
6 #include <vector>
|
|
7 #include <algorithm>
|
|
8 #include <initializer_list>
|
|
9 #include <iterator>
|
|
10 #include <iomanip>
|
|
11 #include <cassert>
|
|
12 #include "color.hpp"
|
|
13
|
|
14 namespace toml
|
|
15 {
|
|
16 namespace detail
|
|
17 {
|
|
18
|
|
19 // helper function to avoid std::string(0, 'c') or std::string(iter, iter)
|
|
20 template<typename Iterator>
|
|
21 std::string make_string(Iterator first, Iterator last)
|
|
22 {
|
|
23 if(first == last) {return "";}
|
|
24 return std::string(first, last);
|
|
25 }
|
|
26 inline std::string make_string(std::size_t len, char c)
|
|
27 {
|
|
28 if(len == 0) {return "";}
|
|
29 return std::string(len, c);
|
|
30 }
|
|
31
|
|
32 // region_base is a base class of location and region that are defined below.
|
|
33 // it will be used to generate better error messages.
|
|
34 struct region_base
|
|
35 {
|
|
36 region_base() = default;
|
|
37 virtual ~region_base() = default;
|
|
38 region_base(const region_base&) = default;
|
|
39 region_base(region_base&& ) = default;
|
|
40 region_base& operator=(const region_base&) = default;
|
|
41 region_base& operator=(region_base&& ) = default;
|
|
42
|
|
43 virtual bool is_ok() const noexcept {return false;}
|
|
44 virtual char front() const noexcept {return '\0';}
|
|
45
|
|
46 virtual std::string str() const {return std::string("unknown region");}
|
|
47 virtual std::string name() const {return std::string("unknown file");}
|
|
48 virtual std::string line() const {return std::string("unknown line");}
|
|
49 virtual std::string line_num() const {return std::string("?");}
|
|
50
|
|
51 // length of the region
|
|
52 virtual std::size_t size() const noexcept {return 0;}
|
|
53 // number of characters in the line before the region
|
|
54 virtual std::size_t before() const noexcept {return 0;}
|
|
55 // number of characters in the line after the region
|
|
56 virtual std::size_t after() const noexcept {return 0;}
|
|
57
|
|
58 virtual std::vector<std::string> comments() const {return {};}
|
|
59 // ```toml
|
|
60 // # comment_before
|
|
61 // key = "value" # comment_inline
|
|
62 // ```
|
|
63 };
|
|
64
|
|
65 // location represents a position in a container, which contains a file content.
|
|
66 // it can be considered as a region that contains only one character.
|
|
67 //
|
|
68 // it contains pointer to the file content and iterator that points the current
|
|
69 // location.
|
|
70 struct location final : public region_base
|
|
71 {
|
|
72 using const_iterator = typename std::vector<char>::const_iterator;
|
|
73 using difference_type = typename std::iterator_traits<const_iterator>::difference_type;
|
|
74 using source_ptr = std::shared_ptr<const std::vector<char>>;
|
|
75
|
|
76 location(std::string source_name, std::vector<char> cont)
|
|
77 : source_(std::make_shared<std::vector<char>>(std::move(cont))),
|
|
78 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin())
|
|
79 {}
|
|
80 location(std::string source_name, const std::string& cont)
|
|
81 : source_(std::make_shared<std::vector<char>>(cont.begin(), cont.end())),
|
|
82 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin())
|
|
83 {}
|
|
84
|
|
85 location(const location&) = default;
|
|
86 location(location&&) = default;
|
|
87 location& operator=(const location&) = default;
|
|
88 location& operator=(location&&) = default;
|
|
89 ~location() = default;
|
|
90
|
|
91 bool is_ok() const noexcept override {return static_cast<bool>(source_);}
|
|
92 char front() const noexcept override {return *iter_;}
|
|
93
|
|
94 // this const prohibits codes like `++(loc.iter())`.
|
|
95 std::add_const<const_iterator>::type iter() const noexcept {return iter_;}
|
|
96
|
|
97 const_iterator begin() const noexcept {return source_->cbegin();}
|
|
98 const_iterator end() const noexcept {return source_->cend();}
|
|
99
|
|
100 // XXX `location::line_num()` used to be implemented using `std::count` to
|
|
101 // count a number of '\n'. But with a long toml file (typically, 10k lines),
|
|
102 // it becomes intolerably slow because each time it generates error messages,
|
|
103 // it counts '\n' from thousands of characters. To workaround it, I decided
|
|
104 // to introduce `location::line_number_` member variable and synchronize it
|
|
105 // to the location changes the point to look. So an overload of `iter()`
|
|
106 // which returns mutable reference is removed and `advance()`, `retrace()`
|
|
107 // and `reset()` is added.
|
|
108 void advance(difference_type n = 1) noexcept
|
|
109 {
|
|
110 this->line_number_ += static_cast<std::size_t>(
|
|
111 std::count(this->iter_, std::next(this->iter_, n), '\n'));
|
|
112 this->iter_ += n;
|
|
113 return;
|
|
114 }
|
|
115 void retrace(difference_type n = 1) noexcept
|
|
116 {
|
|
117 this->line_number_ -= static_cast<std::size_t>(
|
|
118 std::count(std::prev(this->iter_, n), this->iter_, '\n'));
|
|
119 this->iter_ -= n;
|
|
120 return;
|
|
121 }
|
|
122 void reset(const_iterator rollback) noexcept
|
|
123 {
|
|
124 // since c++11, std::distance works in both ways for random-access
|
|
125 // iterators and returns a negative value if `first > last`.
|
|
126 if(0 <= std::distance(rollback, this->iter_)) // rollback < iter
|
|
127 {
|
|
128 this->line_number_ -= static_cast<std::size_t>(
|
|
129 std::count(rollback, this->iter_, '\n'));
|
|
130 }
|
|
131 else // iter < rollback [[unlikely]]
|
|
132 {
|
|
133 this->line_number_ += static_cast<std::size_t>(
|
|
134 std::count(this->iter_, rollback, '\n'));
|
|
135 }
|
|
136 this->iter_ = rollback;
|
|
137 return;
|
|
138 }
|
|
139
|
|
140 std::string str() const override {return make_string(1, *this->iter());}
|
|
141 std::string name() const override {return source_name_;}
|
|
142
|
|
143 std::string line_num() const override
|
|
144 {
|
|
145 return std::to_string(this->line_number_);
|
|
146 }
|
|
147
|
|
148 std::string line() const override
|
|
149 {
|
|
150 return make_string(this->line_begin(), this->line_end());
|
|
151 }
|
|
152
|
|
153 const_iterator line_begin() const noexcept
|
|
154 {
|
|
155 using reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
156 return std::find(reverse_iterator(this->iter()),
|
|
157 reverse_iterator(this->begin()), '\n').base();
|
|
158 }
|
|
159 const_iterator line_end() const noexcept
|
|
160 {
|
|
161 return std::find(this->iter(), this->end(), '\n');
|
|
162 }
|
|
163
|
|
164 // location is always points a character. so the size is 1.
|
|
165 std::size_t size() const noexcept override
|
|
166 {
|
|
167 return 1u;
|
|
168 }
|
|
169 std::size_t before() const noexcept override
|
|
170 {
|
|
171 const auto sz = std::distance(this->line_begin(), this->iter());
|
|
172 assert(sz >= 0);
|
|
173 return static_cast<std::size_t>(sz);
|
|
174 }
|
|
175 std::size_t after() const noexcept override
|
|
176 {
|
|
177 const auto sz = std::distance(this->iter(), this->line_end());
|
|
178 assert(sz >= 0);
|
|
179 return static_cast<std::size_t>(sz);
|
|
180 }
|
|
181
|
|
182 source_ptr const& source() const& noexcept {return source_;}
|
|
183 source_ptr&& source() && noexcept {return std::move(source_);}
|
|
184
|
|
185 private:
|
|
186
|
|
187 source_ptr source_;
|
|
188 std::size_t line_number_;
|
|
189 std::string source_name_;
|
|
190 const_iterator iter_;
|
|
191 };
|
|
192
|
|
193 // region represents a range in a container, which contains a file content.
|
|
194 //
|
|
195 // it contains pointer to the file content and iterator that points the first
|
|
196 // and last location.
|
|
197 struct region final : public region_base
|
|
198 {
|
|
199 using const_iterator = typename std::vector<char>::const_iterator;
|
|
200 using source_ptr = std::shared_ptr<const std::vector<char>>;
|
|
201
|
|
202 // delete default constructor. source_ never be null.
|
|
203 region() = delete;
|
|
204
|
|
205 explicit region(const location& loc)
|
|
206 : source_(loc.source()), source_name_(loc.name()),
|
|
207 first_(loc.iter()), last_(loc.iter())
|
|
208 {}
|
|
209 explicit region(location&& loc)
|
|
210 : source_(loc.source()), source_name_(loc.name()),
|
|
211 first_(loc.iter()), last_(loc.iter())
|
|
212 {}
|
|
213
|
|
214 region(const location& loc, const_iterator f, const_iterator l)
|
|
215 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l)
|
|
216 {}
|
|
217 region(location&& loc, const_iterator f, const_iterator l)
|
|
218 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l)
|
|
219 {}
|
|
220
|
|
221 region(const region&) = default;
|
|
222 region(region&&) = default;
|
|
223 region& operator=(const region&) = default;
|
|
224 region& operator=(region&&) = default;
|
|
225 ~region() = default;
|
|
226
|
|
227 region& operator+=(const region& other)
|
|
228 {
|
|
229 // different regions cannot be concatenated
|
|
230 assert(this->source_ == other.source_ && this->last_ == other.first_);
|
|
231
|
|
232 this->last_ = other.last_;
|
|
233 return *this;
|
|
234 }
|
|
235
|
|
236 bool is_ok() const noexcept override {return static_cast<bool>(source_);}
|
|
237 char front() const noexcept override {return *first_;}
|
|
238
|
|
239 std::string str() const override {return make_string(first_, last_);}
|
|
240 std::string line() const override
|
|
241 {
|
|
242 if(this->contain_newline())
|
|
243 {
|
|
244 return make_string(this->line_begin(),
|
|
245 std::find(this->line_begin(), this->last(), '\n'));
|
|
246 }
|
|
247 return make_string(this->line_begin(), this->line_end());
|
|
248 }
|
|
249 std::string line_num() const override
|
|
250 {
|
|
251 return std::to_string(1 + std::count(this->begin(), this->first(), '\n'));
|
|
252 }
|
|
253
|
|
254 std::size_t size() const noexcept override
|
|
255 {
|
|
256 const auto sz = std::distance(first_, last_);
|
|
257 assert(sz >= 0);
|
|
258 return static_cast<std::size_t>(sz);
|
|
259 }
|
|
260 std::size_t before() const noexcept override
|
|
261 {
|
|
262 const auto sz = std::distance(this->line_begin(), this->first());
|
|
263 assert(sz >= 0);
|
|
264 return static_cast<std::size_t>(sz);
|
|
265 }
|
|
266 std::size_t after() const noexcept override
|
|
267 {
|
|
268 const auto sz = std::distance(this->last(), this->line_end());
|
|
269 assert(sz >= 0);
|
|
270 return static_cast<std::size_t>(sz);
|
|
271 }
|
|
272
|
|
273 bool contain_newline() const noexcept
|
|
274 {
|
|
275 return std::find(this->first(), this->last(), '\n') != this->last();
|
|
276 }
|
|
277
|
|
278 const_iterator line_begin() const noexcept
|
|
279 {
|
|
280 using reverse_iterator = std::reverse_iterator<const_iterator>;
|
|
281 return std::find(reverse_iterator(this->first()),
|
|
282 reverse_iterator(this->begin()), '\n').base();
|
|
283 }
|
|
284 const_iterator line_end() const noexcept
|
|
285 {
|
|
286 return std::find(this->last(), this->end(), '\n');
|
|
287 }
|
|
288
|
|
289 const_iterator begin() const noexcept {return source_->cbegin();}
|
|
290 const_iterator end() const noexcept {return source_->cend();}
|
|
291 const_iterator first() const noexcept {return first_;}
|
|
292 const_iterator last() const noexcept {return last_;}
|
|
293
|
|
294 source_ptr const& source() const& noexcept {return source_;}
|
|
295 source_ptr&& source() && noexcept {return std::move(source_);}
|
|
296
|
|
297 std::string name() const override {return source_name_;}
|
|
298
|
|
299 std::vector<std::string> comments() const override
|
|
300 {
|
|
301 // assuming the current region (`*this`) points a value.
|
|
302 // ```toml
|
|
303 // a = "value"
|
|
304 // ^^^^^^^- this region
|
|
305 // ```
|
|
306 using rev_iter = std::reverse_iterator<const_iterator>;
|
|
307
|
|
308 std::vector<std::string> com{};
|
|
309 {
|
|
310 // find comments just before the current region.
|
|
311 // ```toml
|
|
312 // # this should be collected.
|
|
313 // # this also.
|
|
314 // a = value # not this.
|
|
315 // ```
|
|
316
|
|
317 // # this is a comment for `a`, not array elements.
|
|
318 // a = [1, 2, 3, 4, 5]
|
|
319 if(this->first() == std::find_if(this->line_begin(), this->first(),
|
|
320 [](const char c) noexcept -> bool {return c == '[' || c == '{';}))
|
|
321 {
|
|
322 auto iter = this->line_begin(); // points the first character
|
|
323 while(iter != this->begin())
|
|
324 {
|
|
325 iter = std::prev(iter);
|
|
326
|
|
327 // range [line_start, iter) represents the previous line
|
|
328 const auto line_start = std::find(
|
|
329 rev_iter(iter), rev_iter(this->begin()), '\n').base();
|
|
330 const auto comment_found = std::find(line_start, iter, '#');
|
|
331 if(comment_found == iter)
|
|
332 {
|
|
333 break; // comment not found.
|
|
334 }
|
|
335
|
|
336 // exclude the following case.
|
|
337 // > a = "foo" # comment // <-- this is not a comment for b but a.
|
|
338 // > b = "current value"
|
|
339 if(std::all_of(line_start, comment_found,
|
|
340 [](const char c) noexcept -> bool {
|
|
341 return c == ' ' || c == '\t';
|
|
342 }))
|
|
343 {
|
|
344 // unwrap the first '#' by std::next.
|
|
345 auto s = make_string(std::next(comment_found), iter);
|
|
346 if(!s.empty() && s.back() == '\r') {s.pop_back();}
|
|
347 com.push_back(std::move(s));
|
|
348 }
|
|
349 else
|
|
350 {
|
|
351 break;
|
|
352 }
|
|
353 iter = line_start;
|
|
354 }
|
|
355 }
|
|
356 }
|
|
357
|
|
358 if(com.size() > 1)
|
|
359 {
|
|
360 std::reverse(com.begin(), com.end());
|
|
361 }
|
|
362
|
|
363 {
|
|
364 // find comments just after the current region.
|
|
365 // ```toml
|
|
366 // # not this.
|
|
367 // a = value # this one.
|
|
368 // a = [ # not this (technically difficult)
|
|
369 //
|
|
370 // ] # and this.
|
|
371 // ```
|
|
372 // The reason why it's difficult is that it requires parsing in the
|
|
373 // following case.
|
|
374 // ```toml
|
|
375 // a = [ 10 # this comment is for `10`. not for `a` but `a[0]`.
|
|
376 // # ...
|
|
377 // ] # this is apparently a comment for a.
|
|
378 //
|
|
379 // b = [
|
|
380 // 3.14 ] # there is no way to add a comment to `3.14` currently.
|
|
381 //
|
|
382 // c = [
|
|
383 // 3.14 # do this if you need a comment here.
|
|
384 // ]
|
|
385 // ```
|
|
386 const auto comment_found =
|
|
387 std::find(this->last(), this->line_end(), '#');
|
|
388 if(comment_found != this->line_end()) // '#' found
|
|
389 {
|
|
390 // table = {key = "value"} # what is this for?
|
|
391 // the above comment is not for "value", but {key="value"}.
|
|
392 if(comment_found == std::find_if(this->last(), comment_found,
|
|
393 [](const char c) noexcept -> bool {
|
|
394 return !(c == ' ' || c == '\t' || c == ',');
|
|
395 }))
|
|
396 {
|
|
397 // unwrap the first '#' by std::next.
|
|
398 auto s = make_string(std::next(comment_found), this->line_end());
|
|
399 if(!s.empty() && s.back() == '\r') {s.pop_back();}
|
|
400 com.push_back(std::move(s));
|
|
401 }
|
|
402 }
|
|
403 }
|
|
404 return com;
|
|
405 }
|
|
406
|
|
407 private:
|
|
408
|
|
409 source_ptr source_;
|
|
410 std::string source_name_;
|
|
411 const_iterator first_, last_;
|
|
412 };
|
|
413
|
|
414 } // detail
|
|
415 } // toml
|
|
416 #endif// TOML11_REGION_H
|