318
+ − 1 // Copyright Toru Niina 2017.
+ − 2 // Distributed under the MIT License.
+ − 3 #ifndef TOML11_REGION_HPP
+ − 4 #define TOML11_REGION_HPP
+ − 5 #include <memory>
+ − 6 #include <vector>
+ − 7 #include <algorithm>
+ − 8 #include <initializer_list>
+ − 9 #include <iterator>
+ − 10 #include <iomanip>
+ − 11 #include <cassert>
+ − 12 #include "color.hpp"
+ − 13
+ − 14 namespace toml
+ − 15 {
+ − 16 namespace detail
+ − 17 {
+ − 18
+ − 19 // helper function to avoid std::string(0, 'c') or std::string(iter, iter)
+ − 20 template<typename Iterator>
+ − 21 std::string make_string(Iterator first, Iterator last)
+ − 22 {
+ − 23 if(first == last) {return "";}
+ − 24 return std::string(first, last);
+ − 25 }
+ − 26 inline std::string make_string(std::size_t len, char c)
+ − 27 {
+ − 28 if(len == 0) {return "";}
+ − 29 return std::string(len, c);
+ − 30 }
+ − 31
+ − 32 // region_base is a base class of location and region that are defined below.
+ − 33 // it will be used to generate better error messages.
+ − 34 struct region_base
+ − 35 {
+ − 36 region_base() = default;
+ − 37 virtual ~region_base() = default;
+ − 38 region_base(const region_base&) = default;
+ − 39 region_base(region_base&& ) = default;
+ − 40 region_base& operator=(const region_base&) = default;
+ − 41 region_base& operator=(region_base&& ) = default;
+ − 42
+ − 43 virtual bool is_ok() const noexcept {return false;}
+ − 44 virtual char front() const noexcept {return '\0';}
+ − 45
+ − 46 virtual std::string str() const {return std::string("unknown region");}
+ − 47 virtual std::string name() const {return std::string("unknown file");}
+ − 48 virtual std::string line() const {return std::string("unknown line");}
+ − 49 virtual std::string line_num() const {return std::string("?");}
+ − 50
+ − 51 // length of the region
+ − 52 virtual std::size_t size() const noexcept {return 0;}
+ − 53 // number of characters in the line before the region
+ − 54 virtual std::size_t before() const noexcept {return 0;}
+ − 55 // number of characters in the line after the region
+ − 56 virtual std::size_t after() const noexcept {return 0;}
+ − 57
+ − 58 virtual std::vector<std::string> comments() const {return {};}
+ − 59 // ```toml
+ − 60 // # comment_before
+ − 61 // key = "value" # comment_inline
+ − 62 // ```
+ − 63 };
+ − 64
+ − 65 // location represents a position in a container, which contains a file content.
+ − 66 // it can be considered as a region that contains only one character.
+ − 67 //
+ − 68 // it contains pointer to the file content and iterator that points the current
+ − 69 // location.
+ − 70 struct location final : public region_base
+ − 71 {
+ − 72 using const_iterator = typename std::vector<char>::const_iterator;
+ − 73 using difference_type = typename std::iterator_traits<const_iterator>::difference_type;
+ − 74 using source_ptr = std::shared_ptr<const std::vector<char>>;
+ − 75
+ − 76 location(std::string source_name, std::vector<char> cont)
+ − 77 : source_(std::make_shared<std::vector<char>>(std::move(cont))),
+ − 78 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin())
+ − 79 {}
+ − 80 location(std::string source_name, const std::string& cont)
+ − 81 : source_(std::make_shared<std::vector<char>>(cont.begin(), cont.end())),
+ − 82 line_number_(1), source_name_(std::move(source_name)), iter_(source_->cbegin())
+ − 83 {}
+ − 84
+ − 85 location(const location&) = default;
+ − 86 location(location&&) = default;
+ − 87 location& operator=(const location&) = default;
+ − 88 location& operator=(location&&) = default;
+ − 89 ~location() = default;
+ − 90
+ − 91 bool is_ok() const noexcept override {return static_cast<bool>(source_);}
+ − 92 char front() const noexcept override {return *iter_;}
+ − 93
+ − 94 // this const prohibits codes like `++(loc.iter())`.
+ − 95 std::add_const<const_iterator>::type iter() const noexcept {return iter_;}
+ − 96
+ − 97 const_iterator begin() const noexcept {return source_->cbegin();}
+ − 98 const_iterator end() const noexcept {return source_->cend();}
+ − 99
+ − 100 // XXX `location::line_num()` used to be implemented using `std::count` to
+ − 101 // count a number of '\n'. But with a long toml file (typically, 10k lines),
+ − 102 // it becomes intolerably slow because each time it generates error messages,
+ − 103 // it counts '\n' from thousands of characters. To workaround it, I decided
+ − 104 // to introduce `location::line_number_` member variable and synchronize it
+ − 105 // to the location changes the point to look. So an overload of `iter()`
+ − 106 // which returns mutable reference is removed and `advance()`, `retrace()`
+ − 107 // and `reset()` is added.
+ − 108 void advance(difference_type n = 1) noexcept
+ − 109 {
+ − 110 this->line_number_ += static_cast<std::size_t>(
+ − 111 std::count(this->iter_, std::next(this->iter_, n), '\n'));
+ − 112 this->iter_ += n;
+ − 113 return;
+ − 114 }
+ − 115 void retrace(difference_type n = 1) noexcept
+ − 116 {
+ − 117 this->line_number_ -= static_cast<std::size_t>(
+ − 118 std::count(std::prev(this->iter_, n), this->iter_, '\n'));
+ − 119 this->iter_ -= n;
+ − 120 return;
+ − 121 }
+ − 122 void reset(const_iterator rollback) noexcept
+ − 123 {
+ − 124 // since c++11, std::distance works in both ways for random-access
+ − 125 // iterators and returns a negative value if `first > last`.
+ − 126 if(0 <= std::distance(rollback, this->iter_)) // rollback < iter
+ − 127 {
+ − 128 this->line_number_ -= static_cast<std::size_t>(
+ − 129 std::count(rollback, this->iter_, '\n'));
+ − 130 }
+ − 131 else // iter < rollback [[unlikely]]
+ − 132 {
+ − 133 this->line_number_ += static_cast<std::size_t>(
+ − 134 std::count(this->iter_, rollback, '\n'));
+ − 135 }
+ − 136 this->iter_ = rollback;
+ − 137 return;
+ − 138 }
+ − 139
+ − 140 std::string str() const override {return make_string(1, *this->iter());}
+ − 141 std::string name() const override {return source_name_;}
+ − 142
+ − 143 std::string line_num() const override
+ − 144 {
+ − 145 return std::to_string(this->line_number_);
+ − 146 }
+ − 147
+ − 148 std::string line() const override
+ − 149 {
+ − 150 return make_string(this->line_begin(), this->line_end());
+ − 151 }
+ − 152
+ − 153 const_iterator line_begin() const noexcept
+ − 154 {
+ − 155 using reverse_iterator = std::reverse_iterator<const_iterator>;
+ − 156 return std::find(reverse_iterator(this->iter()),
+ − 157 reverse_iterator(this->begin()), '\n').base();
+ − 158 }
+ − 159 const_iterator line_end() const noexcept
+ − 160 {
+ − 161 return std::find(this->iter(), this->end(), '\n');
+ − 162 }
+ − 163
+ − 164 // location is always points a character. so the size is 1.
+ − 165 std::size_t size() const noexcept override
+ − 166 {
+ − 167 return 1u;
+ − 168 }
+ − 169 std::size_t before() const noexcept override
+ − 170 {
+ − 171 const auto sz = std::distance(this->line_begin(), this->iter());
+ − 172 assert(sz >= 0);
+ − 173 return static_cast<std::size_t>(sz);
+ − 174 }
+ − 175 std::size_t after() const noexcept override
+ − 176 {
+ − 177 const auto sz = std::distance(this->iter(), this->line_end());
+ − 178 assert(sz >= 0);
+ − 179 return static_cast<std::size_t>(sz);
+ − 180 }
+ − 181
+ − 182 source_ptr const& source() const& noexcept {return source_;}
+ − 183 source_ptr&& source() && noexcept {return std::move(source_);}
+ − 184
+ − 185 private:
+ − 186
+ − 187 source_ptr source_;
+ − 188 std::size_t line_number_;
+ − 189 std::string source_name_;
+ − 190 const_iterator iter_;
+ − 191 };
+ − 192
+ − 193 // region represents a range in a container, which contains a file content.
+ − 194 //
+ − 195 // it contains pointer to the file content and iterator that points the first
+ − 196 // and last location.
+ − 197 struct region final : public region_base
+ − 198 {
+ − 199 using const_iterator = typename std::vector<char>::const_iterator;
+ − 200 using source_ptr = std::shared_ptr<const std::vector<char>>;
+ − 201
+ − 202 // delete default constructor. source_ never be null.
+ − 203 region() = delete;
+ − 204
+ − 205 explicit region(const location& loc)
+ − 206 : source_(loc.source()), source_name_(loc.name()),
+ − 207 first_(loc.iter()), last_(loc.iter())
+ − 208 {}
+ − 209 explicit region(location&& loc)
+ − 210 : source_(loc.source()), source_name_(loc.name()),
+ − 211 first_(loc.iter()), last_(loc.iter())
+ − 212 {}
+ − 213
+ − 214 region(const location& loc, const_iterator f, const_iterator l)
+ − 215 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l)
+ − 216 {}
+ − 217 region(location&& loc, const_iterator f, const_iterator l)
+ − 218 : source_(loc.source()), source_name_(loc.name()), first_(f), last_(l)
+ − 219 {}
+ − 220
+ − 221 region(const region&) = default;
+ − 222 region(region&&) = default;
+ − 223 region& operator=(const region&) = default;
+ − 224 region& operator=(region&&) = default;
+ − 225 ~region() = default;
+ − 226
+ − 227 region& operator+=(const region& other)
+ − 228 {
+ − 229 // different regions cannot be concatenated
+ − 230 assert(this->source_ == other.source_ && this->last_ == other.first_);
+ − 231
+ − 232 this->last_ = other.last_;
+ − 233 return *this;
+ − 234 }
+ − 235
+ − 236 bool is_ok() const noexcept override {return static_cast<bool>(source_);}
+ − 237 char front() const noexcept override {return *first_;}
+ − 238
+ − 239 std::string str() const override {return make_string(first_, last_);}
+ − 240 std::string line() const override
+ − 241 {
+ − 242 if(this->contain_newline())
+ − 243 {
+ − 244 return make_string(this->line_begin(),
+ − 245 std::find(this->line_begin(), this->last(), '\n'));
+ − 246 }
+ − 247 return make_string(this->line_begin(), this->line_end());
+ − 248 }
+ − 249 std::string line_num() const override
+ − 250 {
+ − 251 return std::to_string(1 + std::count(this->begin(), this->first(), '\n'));
+ − 252 }
+ − 253
+ − 254 std::size_t size() const noexcept override
+ − 255 {
+ − 256 const auto sz = std::distance(first_, last_);
+ − 257 assert(sz >= 0);
+ − 258 return static_cast<std::size_t>(sz);
+ − 259 }
+ − 260 std::size_t before() const noexcept override
+ − 261 {
+ − 262 const auto sz = std::distance(this->line_begin(), this->first());
+ − 263 assert(sz >= 0);
+ − 264 return static_cast<std::size_t>(sz);
+ − 265 }
+ − 266 std::size_t after() const noexcept override
+ − 267 {
+ − 268 const auto sz = std::distance(this->last(), this->line_end());
+ − 269 assert(sz >= 0);
+ − 270 return static_cast<std::size_t>(sz);
+ − 271 }
+ − 272
+ − 273 bool contain_newline() const noexcept
+ − 274 {
+ − 275 return std::find(this->first(), this->last(), '\n') != this->last();
+ − 276 }
+ − 277
+ − 278 const_iterator line_begin() const noexcept
+ − 279 {
+ − 280 using reverse_iterator = std::reverse_iterator<const_iterator>;
+ − 281 return std::find(reverse_iterator(this->first()),
+ − 282 reverse_iterator(this->begin()), '\n').base();
+ − 283 }
+ − 284 const_iterator line_end() const noexcept
+ − 285 {
+ − 286 return std::find(this->last(), this->end(), '\n');
+ − 287 }
+ − 288
+ − 289 const_iterator begin() const noexcept {return source_->cbegin();}
+ − 290 const_iterator end() const noexcept {return source_->cend();}
+ − 291 const_iterator first() const noexcept {return first_;}
+ − 292 const_iterator last() const noexcept {return last_;}
+ − 293
+ − 294 source_ptr const& source() const& noexcept {return source_;}
+ − 295 source_ptr&& source() && noexcept {return std::move(source_);}
+ − 296
+ − 297 std::string name() const override {return source_name_;}
+ − 298
+ − 299 std::vector<std::string> comments() const override
+ − 300 {
+ − 301 // assuming the current region (`*this`) points a value.
+ − 302 // ```toml
+ − 303 // a = "value"
+ − 304 // ^^^^^^^- this region
+ − 305 // ```
+ − 306 using rev_iter = std::reverse_iterator<const_iterator>;
+ − 307
+ − 308 std::vector<std::string> com{};
+ − 309 {
+ − 310 // find comments just before the current region.
+ − 311 // ```toml
+ − 312 // # this should be collected.
+ − 313 // # this also.
+ − 314 // a = value # not this.
+ − 315 // ```
+ − 316
+ − 317 // # this is a comment for `a`, not array elements.
+ − 318 // a = [1, 2, 3, 4, 5]
+ − 319 if(this->first() == std::find_if(this->line_begin(), this->first(),
+ − 320 [](const char c) noexcept -> bool {return c == '[' || c == '{';}))
+ − 321 {
+ − 322 auto iter = this->line_begin(); // points the first character
+ − 323 while(iter != this->begin())
+ − 324 {
+ − 325 iter = std::prev(iter);
+ − 326
+ − 327 // range [line_start, iter) represents the previous line
+ − 328 const auto line_start = std::find(
+ − 329 rev_iter(iter), rev_iter(this->begin()), '\n').base();
+ − 330 const auto comment_found = std::find(line_start, iter, '#');
+ − 331 if(comment_found == iter)
+ − 332 {
+ − 333 break; // comment not found.
+ − 334 }
+ − 335
+ − 336 // exclude the following case.
+ − 337 // > a = "foo" # comment // <-- this is not a comment for b but a.
+ − 338 // > b = "current value"
+ − 339 if(std::all_of(line_start, comment_found,
+ − 340 [](const char c) noexcept -> bool {
+ − 341 return c == ' ' || c == '\t';
+ − 342 }))
+ − 343 {
+ − 344 // unwrap the first '#' by std::next.
+ − 345 auto s = make_string(std::next(comment_found), iter);
+ − 346 if(!s.empty() && s.back() == '\r') {s.pop_back();}
+ − 347 com.push_back(std::move(s));
+ − 348 }
+ − 349 else
+ − 350 {
+ − 351 break;
+ − 352 }
+ − 353 iter = line_start;
+ − 354 }
+ − 355 }
+ − 356 }
+ − 357
+ − 358 if(com.size() > 1)
+ − 359 {
+ − 360 std::reverse(com.begin(), com.end());
+ − 361 }
+ − 362
+ − 363 {
+ − 364 // find comments just after the current region.
+ − 365 // ```toml
+ − 366 // # not this.
+ − 367 // a = value # this one.
+ − 368 // a = [ # not this (technically difficult)
+ − 369 //
+ − 370 // ] # and this.
+ − 371 // ```
+ − 372 // The reason why it's difficult is that it requires parsing in the
+ − 373 // following case.
+ − 374 // ```toml
+ − 375 // a = [ 10 # this comment is for `10`. not for `a` but `a[0]`.
+ − 376 // # ...
+ − 377 // ] # this is apparently a comment for a.
+ − 378 //
+ − 379 // b = [
+ − 380 // 3.14 ] # there is no way to add a comment to `3.14` currently.
+ − 381 //
+ − 382 // c = [
+ − 383 // 3.14 # do this if you need a comment here.
+ − 384 // ]
+ − 385 // ```
+ − 386 const auto comment_found =
+ − 387 std::find(this->last(), this->line_end(), '#');
+ − 388 if(comment_found != this->line_end()) // '#' found
+ − 389 {
+ − 390 // table = {key = "value"} # what is this for?
+ − 391 // the above comment is not for "value", but {key="value"}.
+ − 392 if(comment_found == std::find_if(this->last(), comment_found,
+ − 393 [](const char c) noexcept -> bool {
+ − 394 return !(c == ' ' || c == '\t' || c == ',');
+ − 395 }))
+ − 396 {
+ − 397 // unwrap the first '#' by std::next.
+ − 398 auto s = make_string(std::next(comment_found), this->line_end());
+ − 399 if(!s.empty() && s.back() == '\r') {s.pop_back();}
+ − 400 com.push_back(std::move(s));
+ − 401 }
+ − 402 }
+ − 403 }
+ − 404 return com;
+ − 405 }
+ − 406
+ − 407 private:
+ − 408
+ − 409 source_ptr source_;
+ − 410 std::string source_name_;
+ − 411 const_iterator first_, last_;
+ − 412 };
+ − 413
+ − 414 } // detail
+ − 415 } // toml
+ − 416 #endif// TOML11_REGION_H