318
+ − 1 // Copyright Toru Niina 2017.
+ − 2 // Distributed under the MIT License.
+ − 3 #ifndef TOML11_PARSER_HPP
+ − 4 #define TOML11_PARSER_HPP
+ − 5 #include <cstring>
+ − 6 #include <fstream>
+ − 7 #include <sstream>
+ − 8
+ − 9 #include "combinator.hpp"
+ − 10 #include "lexer.hpp"
+ − 11 #include "macros.hpp"
+ − 12 #include "region.hpp"
+ − 13 #include "result.hpp"
+ − 14 #include "types.hpp"
+ − 15 #include "value.hpp"
+ − 16
+ − 17 #ifndef TOML11_DISABLE_STD_FILESYSTEM
+ − 18 #ifdef __cpp_lib_filesystem
+ − 19 #if __has_include(<filesystem>)
+ − 20 #define TOML11_HAS_STD_FILESYSTEM
+ − 21 #include <filesystem>
+ − 22 #endif // has_include(<string_view>)
+ − 23 #endif // __cpp_lib_filesystem
+ − 24 #endif // TOML11_DISABLE_STD_FILESYSTEM
+ − 25
+ − 26 // the previous commit works with 500+ recursions. so it may be too small.
+ − 27 // but in most cases, i think we don't need such a deep recursion of
+ − 28 // arrays or inline-tables.
+ − 29 #define TOML11_VALUE_RECURSION_LIMIT 64
+ − 30
+ − 31 namespace toml
+ − 32 {
+ − 33 namespace detail
+ − 34 {
+ − 35
+ − 36 inline result<std::pair<boolean, region>, std::string>
+ − 37 parse_boolean(location& loc)
+ − 38 {
+ − 39 const auto first = loc.iter();
+ − 40 if(const auto token = lex_boolean::invoke(loc))
+ − 41 {
+ − 42 const auto reg = token.unwrap();
+ − 43 if (reg.str() == "true") {return ok(std::make_pair(true, reg));}
+ − 44 else if(reg.str() == "false") {return ok(std::make_pair(false, reg));}
+ − 45 else // internal error.
+ − 46 {
+ − 47 throw internal_error(format_underline(
+ − 48 "toml::parse_boolean: internal error",
+ − 49 {{source_location(reg), "invalid token"}}),
+ − 50 source_location(reg));
+ − 51 }
+ − 52 }
+ − 53 loc.reset(first); //rollback
+ − 54 return err(format_underline("toml::parse_boolean: ",
+ − 55 {{source_location(loc), "the next token is not a boolean"}}));
+ − 56 }
+ − 57
+ − 58 inline result<std::pair<integer, region>, std::string>
+ − 59 parse_binary_integer(location& loc)
+ − 60 {
+ − 61 const auto first = loc.iter();
+ − 62 if(const auto token = lex_bin_int::invoke(loc))
+ − 63 {
+ − 64 auto str = token.unwrap().str();
+ − 65 assert(str.size() > 2); // minimum -> 0b1
+ − 66 assert(str.at(0) == '0' && str.at(1) == 'b');
+ − 67
+ − 68 // skip all the zeros and `_` locating at the MSB
+ − 69 str.erase(str.begin(), std::find_if(
+ − 70 str.begin() + 2, // to skip prefix `0b`
+ − 71 str.end(),
+ − 72 [](const char c) { return c == '1'; })
+ − 73 );
+ − 74 assert(str.empty() || str.front() == '1');
+ − 75
+ − 76 // since toml11 uses int64_t, 64bit (unsigned) input cannot be read.
+ − 77 const auto max_length = 63 + std::count(str.begin(), str.end(), '_');
+ − 78 if(static_cast<std::string::size_type>(max_length) < str.size())
+ − 79 {
+ − 80 loc.reset(first);
+ − 81 return err(format_underline("toml::parse_binary_integer: "
+ − 82 "only signed 64bit integer is available",
+ − 83 {{source_location(loc), "too large input (> int64_t)"}}));
+ − 84 }
+ − 85
+ − 86 integer retval(0), base(1);
+ − 87 for(auto i(str.rbegin()), e(str.rend()); i!=e; ++i)
+ − 88 {
+ − 89 assert(base != 0); // means overflow, checked in the above code
+ − 90 if(*i == '1')
+ − 91 {
+ − 92 retval += base;
+ − 93 if( (std::numeric_limits<integer>::max)() / 2 < base )
+ − 94 {
+ − 95 base = 0;
+ − 96 }
+ − 97 base *= 2;
+ − 98 }
+ − 99 else if(*i == '0')
+ − 100 {
+ − 101 if( (std::numeric_limits<integer>::max)() / 2 < base )
+ − 102 {
+ − 103 base = 0;
+ − 104 }
+ − 105 base *= 2;
+ − 106 }
+ − 107 else if(*i == '_')
+ − 108 {
+ − 109 // do nothing.
+ − 110 }
+ − 111 else // should be detected by lex_bin_int. [[unlikely]]
+ − 112 {
+ − 113 throw internal_error(format_underline(
+ − 114 "toml::parse_binary_integer: internal error",
+ − 115 {{source_location(token.unwrap()), "invalid token"}}),
+ − 116 source_location(loc));
+ − 117 }
+ − 118 }
+ − 119 return ok(std::make_pair(retval, token.unwrap()));
+ − 120 }
+ − 121 loc.reset(first);
+ − 122 return err(format_underline("toml::parse_binary_integer:",
+ − 123 {{source_location(loc), "the next token is not an integer"}}));
+ − 124 }
+ − 125
+ − 126 inline result<std::pair<integer, region>, std::string>
+ − 127 parse_octal_integer(location& loc)
+ − 128 {
+ − 129 const auto first = loc.iter();
+ − 130 if(const auto token = lex_oct_int::invoke(loc))
+ − 131 {
+ − 132 auto str = token.unwrap().str();
+ − 133 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
+ − 134 str.erase(str.begin()); str.erase(str.begin()); // remove `0o` prefix
+ − 135
+ − 136 std::istringstream iss(str);
+ − 137 integer retval(0);
+ − 138 iss >> std::oct >> retval;
+ − 139 if(iss.fail())
+ − 140 {
+ − 141 // `istream` sets `failbit` if internally-called `std::num_get::get`
+ − 142 // fails.
+ − 143 // `std::num_get::get` calls `std::strtoll` if the argument type is
+ − 144 // signed.
+ − 145 // `std::strtoll` fails if
+ − 146 // - the value is out_of_range or
+ − 147 // - no conversion is possible.
+ − 148 // since we already checked that the string is valid octal integer,
+ − 149 // so the error reason is out_of_range.
+ − 150 loc.reset(first);
+ − 151 return err(format_underline("toml::parse_octal_integer:",
+ − 152 {{source_location(loc), "out of range"}}));
+ − 153 }
+ − 154 return ok(std::make_pair(retval, token.unwrap()));
+ − 155 }
+ − 156 loc.reset(first);
+ − 157 return err(format_underline("toml::parse_octal_integer:",
+ − 158 {{source_location(loc), "the next token is not an integer"}}));
+ − 159 }
+ − 160
+ − 161 inline result<std::pair<integer, region>, std::string>
+ − 162 parse_hexadecimal_integer(location& loc)
+ − 163 {
+ − 164 const auto first = loc.iter();
+ − 165 if(const auto token = lex_hex_int::invoke(loc))
+ − 166 {
+ − 167 auto str = token.unwrap().str();
+ − 168 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
+ − 169 str.erase(str.begin()); str.erase(str.begin()); // remove `0x` prefix
+ − 170
+ − 171 std::istringstream iss(str);
+ − 172 integer retval(0);
+ − 173 iss >> std::hex >> retval;
+ − 174 if(iss.fail())
+ − 175 {
+ − 176 // see parse_octal_integer for detail of this error message.
+ − 177 loc.reset(first);
+ − 178 return err(format_underline("toml::parse_hexadecimal_integer:",
+ − 179 {{source_location(loc), "out of range"}}));
+ − 180 }
+ − 181 return ok(std::make_pair(retval, token.unwrap()));
+ − 182 }
+ − 183 loc.reset(first);
+ − 184 return err(format_underline("toml::parse_hexadecimal_integer",
+ − 185 {{source_location(loc), "the next token is not an integer"}}));
+ − 186 }
+ − 187
+ − 188 inline result<std::pair<integer, region>, std::string>
+ − 189 parse_integer(location& loc)
+ − 190 {
+ − 191 const auto first = loc.iter();
+ − 192 if(first != loc.end() && *first == '0')
+ − 193 {
+ − 194 const auto second = std::next(first);
+ − 195 if(second == loc.end()) // the token is just zero.
+ − 196 {
+ − 197 loc.advance();
+ − 198 return ok(std::make_pair(0, region(loc, first, second)));
+ − 199 }
+ − 200
+ − 201 if(*second == 'b') {return parse_binary_integer (loc);} // 0b1100
+ − 202 if(*second == 'o') {return parse_octal_integer (loc);} // 0o775
+ − 203 if(*second == 'x') {return parse_hexadecimal_integer(loc);} // 0xC0FFEE
+ − 204
+ − 205 if(std::isdigit(*second))
+ − 206 {
+ − 207 return err(format_underline("toml::parse_integer: "
+ − 208 "leading zero in an Integer is not allowed.",
+ − 209 {{source_location(loc), "leading zero"}}));
+ − 210 }
+ − 211 else if(std::isalpha(*second))
+ − 212 {
+ − 213 return err(format_underline("toml::parse_integer: "
+ − 214 "unknown integer prefix appeared.",
+ − 215 {{source_location(loc), "none of 0x, 0o, 0b"}}));
+ − 216 }
+ − 217 }
+ − 218
+ − 219 if(const auto token = lex_dec_int::invoke(loc))
+ − 220 {
+ − 221 auto str = token.unwrap().str();
+ − 222 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
+ − 223
+ − 224 std::istringstream iss(str);
+ − 225 integer retval(0);
+ − 226 iss >> retval;
+ − 227 if(iss.fail())
+ − 228 {
+ − 229 // see parse_octal_integer for detail of this error message.
+ − 230 loc.reset(first);
+ − 231 return err(format_underline("toml::parse_integer:",
+ − 232 {{source_location(loc), "out of range"}}));
+ − 233 }
+ − 234 return ok(std::make_pair(retval, token.unwrap()));
+ − 235 }
+ − 236 loc.reset(first);
+ − 237 return err(format_underline("toml::parse_integer: ",
+ − 238 {{source_location(loc), "the next token is not an integer"}}));
+ − 239 }
+ − 240
+ − 241 inline result<std::pair<floating, region>, std::string>
+ − 242 parse_floating(location& loc)
+ − 243 {
+ − 244 const auto first = loc.iter();
+ − 245 if(const auto token = lex_float::invoke(loc))
+ − 246 {
+ − 247 auto str = token.unwrap().str();
+ − 248 if(str == "inf" || str == "+inf")
+ − 249 {
+ − 250 if(std::numeric_limits<floating>::has_infinity)
+ − 251 {
+ − 252 return ok(std::make_pair(
+ − 253 std::numeric_limits<floating>::infinity(), token.unwrap()));
+ − 254 }
+ − 255 else
+ − 256 {
+ − 257 throw std::domain_error("toml::parse_floating: inf value found"
+ − 258 " but the current environment does not support inf. Please"
+ − 259 " make sure that the floating-point implementation conforms"
+ − 260 " IEEE 754/ISO 60559 international standard.");
+ − 261 }
+ − 262 }
+ − 263 else if(str == "-inf")
+ − 264 {
+ − 265 if(std::numeric_limits<floating>::has_infinity)
+ − 266 {
+ − 267 return ok(std::make_pair(
+ − 268 -std::numeric_limits<floating>::infinity(), token.unwrap()));
+ − 269 }
+ − 270 else
+ − 271 {
+ − 272 throw std::domain_error("toml::parse_floating: inf value found"
+ − 273 " but the current environment does not support inf. Please"
+ − 274 " make sure that the floating-point implementation conforms"
+ − 275 " IEEE 754/ISO 60559 international standard.");
+ − 276 }
+ − 277 }
+ − 278 else if(str == "nan" || str == "+nan")
+ − 279 {
+ − 280 if(std::numeric_limits<floating>::has_quiet_NaN)
+ − 281 {
+ − 282 return ok(std::make_pair(
+ − 283 std::numeric_limits<floating>::quiet_NaN(), token.unwrap()));
+ − 284 }
+ − 285 else if(std::numeric_limits<floating>::has_signaling_NaN)
+ − 286 {
+ − 287 return ok(std::make_pair(
+ − 288 std::numeric_limits<floating>::signaling_NaN(), token.unwrap()));
+ − 289 }
+ − 290 else
+ − 291 {
+ − 292 throw std::domain_error("toml::parse_floating: NaN value found"
+ − 293 " but the current environment does not support NaN. Please"
+ − 294 " make sure that the floating-point implementation conforms"
+ − 295 " IEEE 754/ISO 60559 international standard.");
+ − 296 }
+ − 297 }
+ − 298 else if(str == "-nan")
+ − 299 {
+ − 300 if(std::numeric_limits<floating>::has_quiet_NaN)
+ − 301 {
+ − 302 return ok(std::make_pair(
+ − 303 -std::numeric_limits<floating>::quiet_NaN(), token.unwrap()));
+ − 304 }
+ − 305 else if(std::numeric_limits<floating>::has_signaling_NaN)
+ − 306 {
+ − 307 return ok(std::make_pair(
+ − 308 -std::numeric_limits<floating>::signaling_NaN(), token.unwrap()));
+ − 309 }
+ − 310 else
+ − 311 {
+ − 312 throw std::domain_error("toml::parse_floating: NaN value found"
+ − 313 " but the current environment does not support NaN. Please"
+ − 314 " make sure that the floating-point implementation conforms"
+ − 315 " IEEE 754/ISO 60559 international standard.");
+ − 316 }
+ − 317 }
+ − 318 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
+ − 319 std::istringstream iss(str);
+ − 320 floating v(0.0);
+ − 321 iss >> v;
+ − 322 if(iss.fail())
+ − 323 {
+ − 324 // see parse_octal_integer for detail of this error message.
+ − 325 loc.reset(first);
+ − 326 return err(format_underline("toml::parse_floating:",
+ − 327 {{source_location(loc), "out of range"}}));
+ − 328 }
+ − 329 return ok(std::make_pair(v, token.unwrap()));
+ − 330 }
+ − 331 loc.reset(first);
+ − 332 return err(format_underline("toml::parse_floating: ",
+ − 333 {{source_location(loc), "the next token is not a float"}}));
+ − 334 }
+ − 335
+ − 336 inline std::string read_utf8_codepoint(const region& reg, const location& loc)
+ − 337 {
+ − 338 const auto str = reg.str().substr(1);
+ − 339 std::uint_least32_t codepoint;
+ − 340 std::istringstream iss(str);
+ − 341 iss >> std::hex >> codepoint;
+ − 342
+ − 343 const auto to_char = [](const std::uint_least32_t i) noexcept -> char {
+ − 344 const auto uc = static_cast<unsigned char>(i);
+ − 345 return *reinterpret_cast<const char*>(std::addressof(uc));
+ − 346 };
+ − 347
+ − 348 std::string character;
+ − 349 if(codepoint < 0x80) // U+0000 ... U+0079 ; just an ASCII.
+ − 350 {
+ − 351 character += static_cast<char>(codepoint);
+ − 352 }
+ − 353 else if(codepoint < 0x800) //U+0080 ... U+07FF
+ − 354 {
+ − 355 // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111
+ − 356 character += to_char(0xC0| codepoint >> 6);
+ − 357 character += to_char(0x80|(codepoint & 0x3F));
+ − 358 }
+ − 359 else if(codepoint < 0x10000) // U+0800...U+FFFF
+ − 360 {
+ − 361 if(0xD800 <= codepoint && codepoint <= 0xDFFF)
+ − 362 {
+ − 363 throw syntax_error(format_underline(
+ − 364 "toml::read_utf8_codepoint: codepoints in the range "
+ − 365 "[0xD800, 0xDFFF] are not valid UTF-8.", {{
+ − 366 source_location(loc), "not a valid UTF-8 codepoint"
+ − 367 }}), source_location(loc));
+ − 368 }
+ − 369 assert(codepoint < 0xD800 || 0xDFFF < codepoint);
+ − 370 // 1110yyyy 10yxxxxx 10xxxxxx
+ − 371 character += to_char(0xE0| codepoint >> 12);
+ − 372 character += to_char(0x80|(codepoint >> 6 & 0x3F));
+ − 373 character += to_char(0x80|(codepoint & 0x3F));
+ − 374 }
+ − 375 else if(codepoint < 0x110000) // U+010000 ... U+10FFFF
+ − 376 {
+ − 377 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
+ − 378 character += to_char(0xF0| codepoint >> 18);
+ − 379 character += to_char(0x80|(codepoint >> 12 & 0x3F));
+ − 380 character += to_char(0x80|(codepoint >> 6 & 0x3F));
+ − 381 character += to_char(0x80|(codepoint & 0x3F));
+ − 382 }
+ − 383 else // out of UTF-8 region
+ − 384 {
+ − 385 throw syntax_error(format_underline("toml::read_utf8_codepoint:"
+ − 386 " input codepoint is too large.",
+ − 387 {{source_location(loc), "should be in [0x00..0x10FFFF]"}}),
+ − 388 source_location(loc));
+ − 389 }
+ − 390 return character;
+ − 391 }
+ − 392
+ − 393 inline result<std::string, std::string> parse_escape_sequence(location& loc)
+ − 394 {
+ − 395 const auto first = loc.iter();
+ − 396 if(first == loc.end() || *first != '\\')
+ − 397 {
+ − 398 return err(format_underline("toml::parse_escape_sequence: ", {{
+ − 399 source_location(loc), "the next token is not a backslash \"\\\""}}));
+ − 400 }
+ − 401 loc.advance();
+ − 402 switch(*loc.iter())
+ − 403 {
+ − 404 case '\\':{loc.advance(); return ok(std::string("\\"));}
+ − 405 case '"' :{loc.advance(); return ok(std::string("\""));}
+ − 406 case 'b' :{loc.advance(); return ok(std::string("\b"));}
+ − 407 case 't' :{loc.advance(); return ok(std::string("\t"));}
+ − 408 case 'n' :{loc.advance(); return ok(std::string("\n"));}
+ − 409 case 'f' :{loc.advance(); return ok(std::string("\f"));}
+ − 410 case 'r' :{loc.advance(); return ok(std::string("\r"));}
+ − 411 #ifdef TOML11_USE_UNRELEASED_TOML_FEATURES
+ − 412 case 'e' :{loc.advance(); return ok(std::string("\x1b"));} // ESC
+ − 413 #endif
+ − 414 case 'u' :
+ − 415 {
+ − 416 if(const auto token = lex_escape_unicode_short::invoke(loc))
+ − 417 {
+ − 418 return ok(read_utf8_codepoint(token.unwrap(), loc));
+ − 419 }
+ − 420 else
+ − 421 {
+ − 422 return err(format_underline("parse_escape_sequence: "
+ − 423 "invalid token found in UTF-8 codepoint uXXXX.",
+ − 424 {{source_location(loc), "here"}}));
+ − 425 }
+ − 426 }
+ − 427 case 'U':
+ − 428 {
+ − 429 if(const auto token = lex_escape_unicode_long::invoke(loc))
+ − 430 {
+ − 431 return ok(read_utf8_codepoint(token.unwrap(), loc));
+ − 432 }
+ − 433 else
+ − 434 {
+ − 435 return err(format_underline("parse_escape_sequence: "
+ − 436 "invalid token found in UTF-8 codepoint Uxxxxxxxx",
+ − 437 {{source_location(loc), "here"}}));
+ − 438 }
+ − 439 }
+ − 440 }
+ − 441
+ − 442 const auto msg = format_underline("parse_escape_sequence: "
+ − 443 "unknown escape sequence appeared.", {{source_location(loc),
+ − 444 "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}},
+ − 445 /* Hints = */{"if you want to write backslash as just one backslash, "
+ − 446 "use literal string like: regex = '<\\i\\c*\\s*>'"});
+ − 447 loc.reset(first);
+ − 448 return err(msg);
+ − 449 }
+ − 450
+ − 451 inline std::ptrdiff_t check_utf8_validity(const std::string& reg)
+ − 452 {
+ − 453 location loc("tmp", reg);
+ − 454 const auto u8 = repeat<lex_utf8_code, unlimited>::invoke(loc);
+ − 455 if(!u8 || loc.iter() != loc.end())
+ − 456 {
+ − 457 const auto error_location = std::distance(loc.begin(), loc.iter());
+ − 458 assert(0 <= error_location);
+ − 459 return error_location;
+ − 460 }
+ − 461 return -1;
+ − 462 }
+ − 463
+ − 464 inline result<std::pair<toml::string, region>, std::string>
+ − 465 parse_ml_basic_string(location& loc)
+ − 466 {
+ − 467 const auto first = loc.iter();
+ − 468 if(const auto token = lex_ml_basic_string::invoke(loc))
+ − 469 {
+ − 470 auto inner_loc = loc;
+ − 471 inner_loc.reset(first);
+ − 472
+ − 473 std::string retval;
+ − 474 retval.reserve(token.unwrap().size());
+ − 475
+ − 476 auto delim = lex_ml_basic_string_open::invoke(inner_loc);
+ − 477 if(!delim)
+ − 478 {
+ − 479 throw internal_error(format_underline(
+ − 480 "parse_ml_basic_string: invalid token",
+ − 481 {{source_location(inner_loc), "should be \"\"\""}}),
+ − 482 source_location(inner_loc));
+ − 483 }
+ − 484 // immediate newline is ignored (if exists)
+ − 485 /* discard return value */ lex_newline::invoke(inner_loc);
+ − 486
+ − 487 delim = none();
+ − 488 while(!delim)
+ − 489 {
+ − 490 using lex_unescaped_seq = repeat<
+ − 491 either<lex_ml_basic_unescaped, lex_newline>, unlimited>;
+ − 492 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
+ − 493 {
+ − 494 retval += unescaped.unwrap().str();
+ − 495 }
+ − 496 if(auto escaped = parse_escape_sequence(inner_loc))
+ − 497 {
+ − 498 retval += escaped.unwrap();
+ − 499 }
+ − 500 if(auto esc_nl = lex_ml_basic_escaped_newline::invoke(inner_loc))
+ − 501 {
+ − 502 // ignore newline after escape until next non-ws char
+ − 503 }
+ − 504 if(inner_loc.iter() == inner_loc.end())
+ − 505 {
+ − 506 throw internal_error(format_underline(
+ − 507 "parse_ml_basic_string: unexpected end of region",
+ − 508 {{source_location(inner_loc), "not sufficient token"}}),
+ − 509 source_location(inner_loc));
+ − 510 }
+ − 511 delim = lex_ml_basic_string_close::invoke(inner_loc);
+ − 512 }
+ − 513 // `lex_ml_basic_string_close` allows 3 to 5 `"`s to allow 1 or 2 `"`s
+ − 514 // at just before the delimiter. Here, we need to attach `"`s at the
+ − 515 // end of the string body, if it exists.
+ − 516 // For detail, see the definition of `lex_ml_basic_string_close`.
+ − 517 assert(std::all_of(delim.unwrap().first(), delim.unwrap().last(),
+ − 518 [](const char c) noexcept {return c == '\"';}));
+ − 519 switch(delim.unwrap().size())
+ − 520 {
+ − 521 case 3: {break;}
+ − 522 case 4: {retval += "\""; break;}
+ − 523 case 5: {retval += "\"\""; break;}
+ − 524 default:
+ − 525 {
+ − 526 throw internal_error(format_underline(
+ − 527 "parse_ml_basic_string: closing delimiter has invalid length",
+ − 528 {{source_location(inner_loc), "end of this"}}),
+ − 529 source_location(inner_loc));
+ − 530 }
+ − 531 }
+ − 532
+ − 533 const auto err_loc = check_utf8_validity(token.unwrap().str());
+ − 534 if(err_loc == -1)
+ − 535 {
+ − 536 return ok(std::make_pair(toml::string(retval), token.unwrap()));
+ − 537 }
+ − 538 else
+ − 539 {
+ − 540 inner_loc.reset(first);
+ − 541 inner_loc.advance(err_loc);
+ − 542 throw syntax_error(format_underline(
+ − 543 "parse_ml_basic_string: invalid utf8 sequence found",
+ − 544 {{source_location(inner_loc), "here"}}),
+ − 545 source_location(inner_loc));
+ − 546 }
+ − 547 }
+ − 548 else
+ − 549 {
+ − 550 loc.reset(first);
+ − 551 return err(format_underline("toml::parse_ml_basic_string: "
+ − 552 "the next token is not a valid multiline string",
+ − 553 {{source_location(loc), "here"}}));
+ − 554 }
+ − 555 }
+ − 556
+ − 557 inline result<std::pair<toml::string, region>, std::string>
+ − 558 parse_basic_string(location& loc)
+ − 559 {
+ − 560 const auto first = loc.iter();
+ − 561 if(const auto token = lex_basic_string::invoke(loc))
+ − 562 {
+ − 563 auto inner_loc = loc;
+ − 564 inner_loc.reset(first);
+ − 565
+ − 566 auto quot = lex_quotation_mark::invoke(inner_loc);
+ − 567 if(!quot)
+ − 568 {
+ − 569 throw internal_error(format_underline("parse_basic_string: "
+ − 570 "invalid token", {{source_location(inner_loc), "should be \""}}),
+ − 571 source_location(inner_loc));
+ − 572 }
+ − 573
+ − 574 std::string retval;
+ − 575 retval.reserve(token.unwrap().size());
+ − 576
+ − 577 quot = none();
+ − 578 while(!quot)
+ − 579 {
+ − 580 using lex_unescaped_seq = repeat<lex_basic_unescaped, unlimited>;
+ − 581 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
+ − 582 {
+ − 583 retval += unescaped.unwrap().str();
+ − 584 }
+ − 585 if(auto escaped = parse_escape_sequence(inner_loc))
+ − 586 {
+ − 587 retval += escaped.unwrap();
+ − 588 }
+ − 589 if(inner_loc.iter() == inner_loc.end())
+ − 590 {
+ − 591 throw internal_error(format_underline(
+ − 592 "parse_basic_string: unexpected end of region",
+ − 593 {{source_location(inner_loc), "not sufficient token"}}),
+ − 594 source_location(inner_loc));
+ − 595 }
+ − 596 quot = lex_quotation_mark::invoke(inner_loc);
+ − 597 }
+ − 598
+ − 599 const auto err_loc = check_utf8_validity(token.unwrap().str());
+ − 600 if(err_loc == -1)
+ − 601 {
+ − 602 return ok(std::make_pair(toml::string(retval), token.unwrap()));
+ − 603 }
+ − 604 else
+ − 605 {
+ − 606 inner_loc.reset(first);
+ − 607 inner_loc.advance(err_loc);
+ − 608 throw syntax_error(format_underline(
+ − 609 "parse_basic_string: invalid utf8 sequence found",
+ − 610 {{source_location(inner_loc), "here"}}),
+ − 611 source_location(inner_loc));
+ − 612 }
+ − 613 }
+ − 614 else
+ − 615 {
+ − 616 loc.reset(first); // rollback
+ − 617 return err(format_underline("toml::parse_basic_string: "
+ − 618 "the next token is not a valid string",
+ − 619 {{source_location(loc), "here"}}));
+ − 620 }
+ − 621 }
+ − 622
+ − 623 inline result<std::pair<toml::string, region>, std::string>
+ − 624 parse_ml_literal_string(location& loc)
+ − 625 {
+ − 626 const auto first = loc.iter();
+ − 627 if(const auto token = lex_ml_literal_string::invoke(loc))
+ − 628 {
+ − 629 auto inner_loc = loc;
+ − 630 inner_loc.reset(first);
+ − 631
+ − 632 const auto open = lex_ml_literal_string_open::invoke(inner_loc);
+ − 633 if(!open)
+ − 634 {
+ − 635 throw internal_error(format_underline(
+ − 636 "parse_ml_literal_string: invalid token",
+ − 637 {{source_location(inner_loc), "should be '''"}}),
+ − 638 source_location(inner_loc));
+ − 639 }
+ − 640 // immediate newline is ignored (if exists)
+ − 641 /* discard return value */ lex_newline::invoke(inner_loc);
+ − 642
+ − 643 const auto body = lex_ml_literal_body::invoke(inner_loc);
+ − 644
+ − 645 const auto close = lex_ml_literal_string_close::invoke(inner_loc);
+ − 646 if(!close)
+ − 647 {
+ − 648 throw internal_error(format_underline(
+ − 649 "parse_ml_literal_string: invalid token",
+ − 650 {{source_location(inner_loc), "should be '''"}}),
+ − 651 source_location(inner_loc));
+ − 652 }
+ − 653 // `lex_ml_literal_string_close` allows 3 to 5 `'`s to allow 1 or 2 `'`s
+ − 654 // at just before the delimiter. Here, we need to attach `'`s at the
+ − 655 // end of the string body, if it exists.
+ − 656 // For detail, see the definition of `lex_ml_basic_string_close`.
+ − 657
+ − 658 std::string retval = body.unwrap().str();
+ − 659 assert(std::all_of(close.unwrap().first(), close.unwrap().last(),
+ − 660 [](const char c) noexcept {return c == '\'';}));
+ − 661 switch(close.unwrap().size())
+ − 662 {
+ − 663 case 3: {break;}
+ − 664 case 4: {retval += "'"; break;}
+ − 665 case 5: {retval += "''"; break;}
+ − 666 default:
+ − 667 {
+ − 668 throw internal_error(format_underline(
+ − 669 "parse_ml_literal_string: closing delimiter has invalid length",
+ − 670 {{source_location(inner_loc), "end of this"}}),
+ − 671 source_location(inner_loc));
+ − 672 }
+ − 673 }
+ − 674
+ − 675 const auto err_loc = check_utf8_validity(token.unwrap().str());
+ − 676 if(err_loc == -1)
+ − 677 {
+ − 678 return ok(std::make_pair(toml::string(retval, toml::string_t::literal),
+ − 679 token.unwrap()));
+ − 680 }
+ − 681 else
+ − 682 {
+ − 683 inner_loc.reset(first);
+ − 684 inner_loc.advance(err_loc);
+ − 685 throw syntax_error(format_underline(
+ − 686 "parse_ml_literal_string: invalid utf8 sequence found",
+ − 687 {{source_location(inner_loc), "here"}}),
+ − 688 source_location(inner_loc));
+ − 689 }
+ − 690 }
+ − 691 else
+ − 692 {
+ − 693 loc.reset(first); // rollback
+ − 694 return err(format_underline("toml::parse_ml_literal_string: "
+ − 695 "the next token is not a valid multiline literal string",
+ − 696 {{source_location(loc), "here"}}));
+ − 697 }
+ − 698 }
+ − 699
+ − 700 inline result<std::pair<toml::string, region>, std::string>
+ − 701 parse_literal_string(location& loc)
+ − 702 {
+ − 703 const auto first = loc.iter();
+ − 704 if(const auto token = lex_literal_string::invoke(loc))
+ − 705 {
+ − 706 auto inner_loc = loc;
+ − 707 inner_loc.reset(first);
+ − 708
+ − 709 const auto open = lex_apostrophe::invoke(inner_loc);
+ − 710 if(!open)
+ − 711 {
+ − 712 throw internal_error(format_underline(
+ − 713 "parse_literal_string: invalid token",
+ − 714 {{source_location(inner_loc), "should be '"}}),
+ − 715 source_location(inner_loc));
+ − 716 }
+ − 717
+ − 718 const auto body = repeat<lex_literal_char, unlimited>::invoke(inner_loc);
+ − 719
+ − 720 const auto close = lex_apostrophe::invoke(inner_loc);
+ − 721 if(!close)
+ − 722 {
+ − 723 throw internal_error(format_underline(
+ − 724 "parse_literal_string: invalid token",
+ − 725 {{source_location(inner_loc), "should be '"}}),
+ − 726 source_location(inner_loc));
+ − 727 }
+ − 728
+ − 729 const auto err_loc = check_utf8_validity(token.unwrap().str());
+ − 730 if(err_loc == -1)
+ − 731 {
+ − 732 return ok(std::make_pair(
+ − 733 toml::string(body.unwrap().str(), toml::string_t::literal),
+ − 734 token.unwrap()));
+ − 735 }
+ − 736 else
+ − 737 {
+ − 738 inner_loc.reset(first);
+ − 739 inner_loc.advance(err_loc);
+ − 740 throw syntax_error(format_underline(
+ − 741 "parse_literal_string: invalid utf8 sequence found",
+ − 742 {{source_location(inner_loc), "here"}}),
+ − 743 source_location(inner_loc));
+ − 744 }
+ − 745 }
+ − 746 else
+ − 747 {
+ − 748 loc.reset(first); // rollback
+ − 749 return err(format_underline("toml::parse_literal_string: "
+ − 750 "the next token is not a valid literal string",
+ − 751 {{source_location(loc), "here"}}));
+ − 752 }
+ − 753 }
+ − 754
+ − 755 inline result<std::pair<toml::string, region>, std::string>
+ − 756 parse_string(location& loc)
+ − 757 {
+ − 758 if(loc.iter() != loc.end() && *(loc.iter()) == '"')
+ − 759 {
+ − 760 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '"' &&
+ − 761 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '"')
+ − 762 {
+ − 763 return parse_ml_basic_string(loc);
+ − 764 }
+ − 765 else
+ − 766 {
+ − 767 return parse_basic_string(loc);
+ − 768 }
+ − 769 }
+ − 770 else if(loc.iter() != loc.end() && *(loc.iter()) == '\'')
+ − 771 {
+ − 772 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '\'' &&
+ − 773 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '\'')
+ − 774 {
+ − 775 return parse_ml_literal_string(loc);
+ − 776 }
+ − 777 else
+ − 778 {
+ − 779 return parse_literal_string(loc);
+ − 780 }
+ − 781 }
+ − 782 return err(format_underline("toml::parse_string: ",
+ − 783 {{source_location(loc), "the next token is not a string"}}));
+ − 784 }
+ − 785
+ − 786 inline result<std::pair<local_date, region>, std::string>
+ − 787 parse_local_date(location& loc)
+ − 788 {
+ − 789 const auto first = loc.iter();
+ − 790 if(const auto token = lex_local_date::invoke(loc))
+ − 791 {
+ − 792 location inner_loc(loc.name(), token.unwrap().str());
+ − 793
+ − 794 const auto y = lex_date_fullyear::invoke(inner_loc);
+ − 795 if(!y || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
+ − 796 {
+ − 797 throw internal_error(format_underline(
+ − 798 "toml::parse_local_date: invalid year format",
+ − 799 {{source_location(inner_loc), "should be `-`"}}),
+ − 800 source_location(inner_loc));
+ − 801 }
+ − 802 inner_loc.advance();
+ − 803 const auto m = lex_date_month::invoke(inner_loc);
+ − 804 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
+ − 805 {
+ − 806 throw internal_error(format_underline(
+ − 807 "toml::parse_local_date: invalid month format",
+ − 808 {{source_location(inner_loc), "should be `-`"}}),
+ − 809 source_location(inner_loc));
+ − 810 }
+ − 811 inner_loc.advance();
+ − 812 const auto d = lex_date_mday::invoke(inner_loc);
+ − 813 if(!d)
+ − 814 {
+ − 815 throw internal_error(format_underline(
+ − 816 "toml::parse_local_date: invalid day format",
+ − 817 {{source_location(inner_loc), "here"}}),
+ − 818 source_location(inner_loc));
+ − 819 }
+ − 820
+ − 821 const auto year = static_cast<std::int16_t>(from_string<int>(y.unwrap().str(), 0));
+ − 822 const auto month = static_cast<std::int8_t >(from_string<int>(m.unwrap().str(), 0));
+ − 823 const auto day = static_cast<std::int8_t >(from_string<int>(d.unwrap().str(), 0));
+ − 824
+ − 825 // We briefly check whether the input date is valid or not. But here, we
+ − 826 // only check if the RFC3339 compliance.
+ − 827 // Actually there are several special date that does not exist,
+ − 828 // because of historical reasons, such as 1582/10/5-1582/10/14 (only in
+ − 829 // several countries). But here, we do not care about such a complicated
+ − 830 // rule. It makes the code complicated and there is only low probability
+ − 831 // that such a specific date is needed in practice. If someone need to
+ − 832 // validate date accurately, that means that the one need a specialized
+ − 833 // library for their purpose in a different layer.
+ − 834 {
+ − 835 const bool is_leap = (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0));
+ − 836 const auto max_day = (month == 2) ? (is_leap ? 29 : 28) :
+ − 837 ((month == 4 || month == 6 || month == 9 || month == 11) ? 30 : 31);
+ − 838
+ − 839 if((month < 1 || 12 < month) || (day < 1 || max_day < day))
+ − 840 {
+ − 841 throw syntax_error(format_underline("toml::parse_date: "
+ − 842 "invalid date: it does not conform RFC3339.", {{
+ − 843 source_location(loc), "month should be 01-12, day should be"
+ − 844 " 01-28,29,30,31, depending on month/year."
+ − 845 }}), source_location(inner_loc));
+ − 846 }
+ − 847 }
+ − 848 return ok(std::make_pair(local_date(year, static_cast<month_t>(month - 1), day),
+ − 849 token.unwrap()));
+ − 850 }
+ − 851 else
+ − 852 {
+ − 853 loc.reset(first);
+ − 854 return err(format_underline("toml::parse_local_date: ",
+ − 855 {{source_location(loc), "the next token is not a local_date"}}));
+ − 856 }
+ − 857 }
+ − 858
+ − 859 inline result<std::pair<local_time, region>, std::string>
+ − 860 parse_local_time(location& loc)
+ − 861 {
+ − 862 const auto first = loc.iter();
+ − 863 if(const auto token = lex_local_time::invoke(loc))
+ − 864 {
+ − 865 location inner_loc(loc.name(), token.unwrap().str());
+ − 866
+ − 867 const auto h = lex_time_hour::invoke(inner_loc);
+ − 868 if(!h || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
+ − 869 {
+ − 870 throw internal_error(format_underline(
+ − 871 "toml::parse_local_time: invalid year format",
+ − 872 {{source_location(inner_loc), "should be `:`"}}),
+ − 873 source_location(inner_loc));
+ − 874 }
+ − 875 inner_loc.advance();
+ − 876 const auto m = lex_time_minute::invoke(inner_loc);
+ − 877 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
+ − 878 {
+ − 879 throw internal_error(format_underline(
+ − 880 "toml::parse_local_time: invalid month format",
+ − 881 {{source_location(inner_loc), "should be `:`"}}),
+ − 882 source_location(inner_loc));
+ − 883 }
+ − 884 inner_loc.advance();
+ − 885 const auto s = lex_time_second::invoke(inner_loc);
+ − 886 if(!s)
+ − 887 {
+ − 888 throw internal_error(format_underline(
+ − 889 "toml::parse_local_time: invalid second format",
+ − 890 {{source_location(inner_loc), "here"}}),
+ − 891 source_location(inner_loc));
+ − 892 }
+ − 893
+ − 894 const int hour = from_string<int>(h.unwrap().str(), 0);
+ − 895 const int minute = from_string<int>(m.unwrap().str(), 0);
+ − 896 const int second = from_string<int>(s.unwrap().str(), 0);
+ − 897
+ − 898 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute) ||
+ − 899 (second < 0 || 60 < second)) // it may be leap second
+ − 900 {
+ − 901 throw syntax_error(format_underline("toml::parse_local_time: "
+ − 902 "invalid time: it does not conform RFC3339.", {{
+ − 903 source_location(loc), "hour should be 00-23, minute should be"
+ − 904 " 00-59, second should be 00-60 (depending on the leap"
+ − 905 " second rules.)"}}), source_location(inner_loc));
+ − 906 }
+ − 907
+ − 908 local_time time(hour, minute, second, 0, 0);
+ − 909
+ − 910 const auto before_secfrac = inner_loc.iter();
+ − 911 if(const auto secfrac = lex_time_secfrac::invoke(inner_loc))
+ − 912 {
+ − 913 auto sf = secfrac.unwrap().str();
+ − 914 sf.erase(sf.begin()); // sf.front() == '.'
+ − 915 switch(sf.size() % 3)
+ − 916 {
+ − 917 case 2: sf += '0'; break;
+ − 918 case 1: sf += "00"; break;
+ − 919 case 0: break;
+ − 920 default: break;
+ − 921 }
+ − 922 if(sf.size() >= 9)
+ − 923 {
+ − 924 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u);
+ − 925 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u);
+ − 926 time.nanosecond = from_string<std::uint16_t>(sf.substr(6, 3), 0u);
+ − 927 }
+ − 928 else if(sf.size() >= 6)
+ − 929 {
+ − 930 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u);
+ − 931 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u);
+ − 932 }
+ − 933 else if(sf.size() >= 3)
+ − 934 {
+ − 935 time.millisecond = from_string<std::uint16_t>(sf, 0u);
+ − 936 time.microsecond = 0u;
+ − 937 }
+ − 938 }
+ − 939 else
+ − 940 {
+ − 941 if(before_secfrac != inner_loc.iter())
+ − 942 {
+ − 943 throw internal_error(format_underline(
+ − 944 "toml::parse_local_time: invalid subsecond format",
+ − 945 {{source_location(inner_loc), "here"}}),
+ − 946 source_location(inner_loc));
+ − 947 }
+ − 948 }
+ − 949 return ok(std::make_pair(time, token.unwrap()));
+ − 950 }
+ − 951 else
+ − 952 {
+ − 953 loc.reset(first);
+ − 954 return err(format_underline("toml::parse_local_time: ",
+ − 955 {{source_location(loc), "the next token is not a local_time"}}));
+ − 956 }
+ − 957 }
+ − 958
+ − 959 inline result<std::pair<local_datetime, region>, std::string>
+ − 960 parse_local_datetime(location& loc)
+ − 961 {
+ − 962 const auto first = loc.iter();
+ − 963 if(const auto token = lex_local_date_time::invoke(loc))
+ − 964 {
+ − 965 location inner_loc(loc.name(), token.unwrap().str());
+ − 966 const auto date = parse_local_date(inner_loc);
+ − 967 if(!date || inner_loc.iter() == inner_loc.end())
+ − 968 {
+ − 969 throw internal_error(format_underline(
+ − 970 "toml::parse_local_datetime: invalid datetime format",
+ − 971 {{source_location(inner_loc), "date, not datetime"}}),
+ − 972 source_location(inner_loc));
+ − 973 }
+ − 974 const char delim = *(inner_loc.iter());
+ − 975 if(delim != 'T' && delim != 't' && delim != ' ')
+ − 976 {
+ − 977 throw internal_error(format_underline(
+ − 978 "toml::parse_local_datetime: invalid datetime format",
+ − 979 {{source_location(inner_loc), "should be `T` or ` ` (space)"}}),
+ − 980 source_location(inner_loc));
+ − 981 }
+ − 982 inner_loc.advance();
+ − 983 const auto time = parse_local_time(inner_loc);
+ − 984 if(!time)
+ − 985 {
+ − 986 throw internal_error(format_underline(
+ − 987 "toml::parse_local_datetime: invalid datetime format",
+ − 988 {{source_location(inner_loc), "invalid time format"}}),
+ − 989 source_location(inner_loc));
+ − 990 }
+ − 991 return ok(std::make_pair(
+ − 992 local_datetime(date.unwrap().first, time.unwrap().first),
+ − 993 token.unwrap()));
+ − 994 }
+ − 995 else
+ − 996 {
+ − 997 loc.reset(first);
+ − 998 return err(format_underline("toml::parse_local_datetime: ",
+ − 999 {{source_location(loc), "the next token is not a local_datetime"}}));
+ − 1000 }
+ − 1001 }
+ − 1002
+ − 1003 inline result<std::pair<offset_datetime, region>, std::string>
+ − 1004 parse_offset_datetime(location& loc)
+ − 1005 {
+ − 1006 const auto first = loc.iter();
+ − 1007 if(const auto token = lex_offset_date_time::invoke(loc))
+ − 1008 {
+ − 1009 location inner_loc(loc.name(), token.unwrap().str());
+ − 1010 const auto datetime = parse_local_datetime(inner_loc);
+ − 1011 if(!datetime || inner_loc.iter() == inner_loc.end())
+ − 1012 {
+ − 1013 throw internal_error(format_underline(
+ − 1014 "toml::parse_offset_datetime: invalid datetime format",
+ − 1015 {{source_location(inner_loc), "date, not datetime"}}),
+ − 1016 source_location(inner_loc));
+ − 1017 }
+ − 1018 time_offset offset(0, 0);
+ − 1019 if(const auto ofs = lex_time_numoffset::invoke(inner_loc))
+ − 1020 {
+ − 1021 const auto str = ofs.unwrap().str();
+ − 1022
+ − 1023 const auto hour = from_string<int>(str.substr(1,2), 0);
+ − 1024 const auto minute = from_string<int>(str.substr(4,2), 0);
+ − 1025
+ − 1026 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute))
+ − 1027 {
+ − 1028 throw syntax_error(format_underline("toml::parse_offset_datetime: "
+ − 1029 "invalid offset: it does not conform RFC3339.", {{
+ − 1030 source_location(loc), "month should be 01-12, day should be"
+ − 1031 " 01-28,29,30,31, depending on month/year."
+ − 1032 }}), source_location(inner_loc));
+ − 1033 }
+ − 1034
+ − 1035 if(str.front() == '+')
+ − 1036 {
+ − 1037 offset = time_offset(hour, minute);
+ − 1038 }
+ − 1039 else
+ − 1040 {
+ − 1041 offset = time_offset(-hour, -minute);
+ − 1042 }
+ − 1043 }
+ − 1044 else if(*inner_loc.iter() != 'Z' && *inner_loc.iter() != 'z')
+ − 1045 {
+ − 1046 throw internal_error(format_underline(
+ − 1047 "toml::parse_offset_datetime: invalid datetime format",
+ − 1048 {{source_location(inner_loc), "should be `Z` or `+HH:MM`"}}),
+ − 1049 source_location(inner_loc));
+ − 1050 }
+ − 1051 return ok(std::make_pair(offset_datetime(datetime.unwrap().first, offset),
+ − 1052 token.unwrap()));
+ − 1053 }
+ − 1054 else
+ − 1055 {
+ − 1056 loc.reset(first);
+ − 1057 return err(format_underline("toml::parse_offset_datetime: ",
+ − 1058 {{source_location(loc), "the next token is not a offset_datetime"}}));
+ − 1059 }
+ − 1060 }
+ − 1061
+ − 1062 inline result<std::pair<key, region>, std::string>
+ − 1063 parse_simple_key(location& loc)
+ − 1064 {
+ − 1065 if(const auto bstr = parse_basic_string(loc))
+ − 1066 {
+ − 1067 return ok(std::make_pair(bstr.unwrap().first.str, bstr.unwrap().second));
+ − 1068 }
+ − 1069 if(const auto lstr = parse_literal_string(loc))
+ − 1070 {
+ − 1071 return ok(std::make_pair(lstr.unwrap().first.str, lstr.unwrap().second));
+ − 1072 }
+ − 1073 if(const auto bare = lex_unquoted_key::invoke(loc))
+ − 1074 {
+ − 1075 const auto reg = bare.unwrap();
+ − 1076 return ok(std::make_pair(reg.str(), reg));
+ − 1077 }
+ − 1078 return err(format_underline("toml::parse_simple_key: ",
+ − 1079 {{source_location(loc), "the next token is not a simple key"}}));
+ − 1080 }
+ − 1081
+ − 1082 // dotted key become vector of keys
+ − 1083 inline result<std::pair<std::vector<key>, region>, std::string>
+ − 1084 parse_key(location& loc)
+ − 1085 {
+ − 1086 const auto first = loc.iter();
+ − 1087 // dotted key -> `foo.bar.baz` where several single keys are chained by
+ − 1088 // dots. Whitespaces between keys and dots are allowed.
+ − 1089 if(const auto token = lex_dotted_key::invoke(loc))
+ − 1090 {
+ − 1091 const auto reg = token.unwrap();
+ − 1092 location inner_loc(loc.name(), reg.str());
+ − 1093 std::vector<key> keys;
+ − 1094
+ − 1095 while(inner_loc.iter() != inner_loc.end())
+ − 1096 {
+ − 1097 lex_ws::invoke(inner_loc);
+ − 1098 if(const auto k = parse_simple_key(inner_loc))
+ − 1099 {
+ − 1100 keys.push_back(k.unwrap().first);
+ − 1101 }
+ − 1102 else
+ − 1103 {
+ − 1104 throw internal_error(format_underline(
+ − 1105 "toml::parse_key: dotted key contains invalid key",
+ − 1106 {{source_location(inner_loc), k.unwrap_err()}}),
+ − 1107 source_location(inner_loc));
+ − 1108 }
+ − 1109
+ − 1110 lex_ws::invoke(inner_loc);
+ − 1111 if(inner_loc.iter() == inner_loc.end())
+ − 1112 {
+ − 1113 break;
+ − 1114 }
+ − 1115 else if(*inner_loc.iter() == '.')
+ − 1116 {
+ − 1117 inner_loc.advance(); // to skip `.`
+ − 1118 }
+ − 1119 else
+ − 1120 {
+ − 1121 throw internal_error(format_underline("toml::parse_key: "
+ − 1122 "dotted key contains invalid key ",
+ − 1123 {{source_location(inner_loc), "should be `.`"}}),
+ − 1124 source_location(inner_loc));
+ − 1125 }
+ − 1126 }
+ − 1127 return ok(std::make_pair(keys, reg));
+ − 1128 }
+ − 1129 loc.reset(first);
+ − 1130
+ − 1131 // simple_key: a single (basic_string|literal_string|bare key)
+ − 1132 if(const auto smpl = parse_simple_key(loc))
+ − 1133 {
+ − 1134 return ok(std::make_pair(std::vector<key>(1, smpl.unwrap().first),
+ − 1135 smpl.unwrap().second));
+ − 1136 }
+ − 1137 return err(format_underline("toml::parse_key: an invalid key appeared.",
+ − 1138 {{source_location(loc), "is not a valid key"}}, {
+ − 1139 "bare keys : non-empty strings composed only of [A-Za-z0-9_-].",
+ − 1140 "quoted keys: same as \"basic strings\" or 'literal strings'.",
+ − 1141 "dotted keys: sequence of bare or quoted keys joined with a dot."
+ − 1142 }));
+ − 1143 }
+ − 1144
+ − 1145 // forward-decl to implement parse_array and parse_table
+ − 1146 template<typename Value>
+ − 1147 result<Value, std::string> parse_value(location&, const std::size_t n_rec);
+ − 1148
+ − 1149 template<typename Value>
+ − 1150 result<std::pair<typename Value::array_type, region>, std::string>
+ − 1151 parse_array(location& loc, const std::size_t n_rec)
+ − 1152 {
+ − 1153 using value_type = Value;
+ − 1154 using array_type = typename value_type::array_type;
+ − 1155
+ − 1156 if(n_rec > TOML11_VALUE_RECURSION_LIMIT)
+ − 1157 {
+ − 1158 // parse_array does not have any way to handle recursive error currently...
+ − 1159 throw syntax_error(std::string("toml::parse_array: recursion limit ("
+ − 1160 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"),
+ − 1161 source_location(loc));
+ − 1162 }
+ − 1163
+ − 1164 const auto first = loc.iter();
+ − 1165 if(loc.iter() == loc.end())
+ − 1166 {
+ − 1167 return err("toml::parse_array: input is empty");
+ − 1168 }
+ − 1169 if(*loc.iter() != '[')
+ − 1170 {
+ − 1171 return err("toml::parse_array: token is not an array");
+ − 1172 }
+ − 1173 loc.advance();
+ − 1174
+ − 1175 using lex_ws_comment_newline = repeat<
+ − 1176 either<lex_wschar, lex_newline, lex_comment>, unlimited>;
+ − 1177
+ − 1178 array_type retval;
+ − 1179 while(loc.iter() != loc.end())
+ − 1180 {
+ − 1181 lex_ws_comment_newline::invoke(loc); // skip
+ − 1182
+ − 1183 if(loc.iter() != loc.end() && *loc.iter() == ']')
+ − 1184 {
+ − 1185 loc.advance(); // skip ']'
+ − 1186 return ok(std::make_pair(retval,
+ − 1187 region(loc, first, loc.iter())));
+ − 1188 }
+ − 1189
+ − 1190 if(auto val = parse_value<value_type>(loc, n_rec+1))
+ − 1191 {
+ − 1192 // After TOML v1.0.0-rc.1, array becomes to be able to have values
+ − 1193 // with different types. So here we will omit this by default.
+ − 1194 //
+ − 1195 // But some of the test-suite checks if the parser accepts a hetero-
+ − 1196 // geneous arrays, so we keep this for a while.
+ − 1197 #ifdef TOML11_DISALLOW_HETEROGENEOUS_ARRAYS
+ − 1198 if(!retval.empty() && retval.front().type() != val.as_ok().type())
+ − 1199 {
+ − 1200 auto array_start_loc = loc;
+ − 1201 array_start_loc.reset(first);
+ − 1202
+ − 1203 throw syntax_error(format_underline("toml::parse_array: "
+ − 1204 "type of elements should be the same each other.", {
+ − 1205 {source_location(array_start_loc), "array starts here"},
+ − 1206 {
+ − 1207 retval.front().location(),
+ − 1208 "value has type " + stringize(retval.front().type())
+ − 1209 },
+ − 1210 {
+ − 1211 val.unwrap().location(),
+ − 1212 "value has different type, " + stringize(val.unwrap().type())
+ − 1213 }
+ − 1214 }), source_location(loc));
+ − 1215 }
+ − 1216 #endif
+ − 1217 retval.push_back(std::move(val.unwrap()));
+ − 1218 }
+ − 1219 else
+ − 1220 {
+ − 1221 auto array_start_loc = loc;
+ − 1222 array_start_loc.reset(first);
+ − 1223
+ − 1224 throw syntax_error(format_underline("toml::parse_array: "
+ − 1225 "value having invalid format appeared in an array", {
+ − 1226 {source_location(array_start_loc), "array starts here"},
+ − 1227 {source_location(loc), "it is not a valid value."}
+ − 1228 }), source_location(loc));
+ − 1229 }
+ − 1230
+ − 1231 using lex_array_separator = sequence<maybe<lex_ws_comment_newline>, character<','>>;
+ − 1232 const auto sp = lex_array_separator::invoke(loc);
+ − 1233 if(!sp)
+ − 1234 {
+ − 1235 lex_ws_comment_newline::invoke(loc);
+ − 1236 if(loc.iter() != loc.end() && *loc.iter() == ']')
+ − 1237 {
+ − 1238 loc.advance(); // skip ']'
+ − 1239 return ok(std::make_pair(retval,
+ − 1240 region(loc, first, loc.iter())));
+ − 1241 }
+ − 1242 else
+ − 1243 {
+ − 1244 auto array_start_loc = loc;
+ − 1245 array_start_loc.reset(first);
+ − 1246
+ − 1247 throw syntax_error(format_underline("toml::parse_array:"
+ − 1248 " missing array separator `,` after a value", {
+ − 1249 {source_location(array_start_loc), "array starts here"},
+ − 1250 {source_location(loc), "should be `,`"}
+ − 1251 }), source_location(loc));
+ − 1252 }
+ − 1253 }
+ − 1254 }
+ − 1255 loc.reset(first);
+ − 1256 throw syntax_error(format_underline("toml::parse_array: "
+ − 1257 "array did not closed by `]`",
+ − 1258 {{source_location(loc), "should be closed"}}),
+ − 1259 source_location(loc));
+ − 1260 }
+ − 1261
+ − 1262 template<typename Value>
+ − 1263 result<std::pair<std::pair<std::vector<key>, region>, Value>, std::string>
+ − 1264 parse_key_value_pair(location& loc, const std::size_t n_rec)
+ − 1265 {
+ − 1266 using value_type = Value;
+ − 1267
+ − 1268 const auto first = loc.iter();
+ − 1269 auto key_reg = parse_key(loc);
+ − 1270 if(!key_reg)
+ − 1271 {
+ − 1272 std::string msg = std::move(key_reg.unwrap_err());
+ − 1273 // if the next token is keyvalue-separator, it means that there are no
+ − 1274 // key. then we need to show error as "empty key is not allowed".
+ − 1275 if(const auto keyval_sep = lex_keyval_sep::invoke(loc))
+ − 1276 {
+ − 1277 loc.reset(first);
+ − 1278 msg = format_underline("toml::parse_key_value_pair: "
+ − 1279 "empty key is not allowed.",
+ − 1280 {{source_location(loc), "key expected before '='"}});
+ − 1281 }
+ − 1282 return err(std::move(msg));
+ − 1283 }
+ − 1284
+ − 1285 const auto kvsp = lex_keyval_sep::invoke(loc);
+ − 1286 if(!kvsp)
+ − 1287 {
+ − 1288 std::string msg;
+ − 1289 // if the line contains '=' after the invalid sequence, possibly the
+ − 1290 // error is in the key (like, invalid character in bare key).
+ − 1291 const auto line_end = std::find(loc.iter(), loc.end(), '\n');
+ − 1292 if(std::find(loc.iter(), line_end, '=') != line_end)
+ − 1293 {
+ − 1294 msg = format_underline("toml::parse_key_value_pair: "
+ − 1295 "invalid format for key",
+ − 1296 {{source_location(loc), "invalid character in key"}},
+ − 1297 {"Did you forget '.' to separate dotted-key?",
+ − 1298 "Allowed characters for bare key are [0-9a-zA-Z_-]."});
+ − 1299 }
+ − 1300 else // if not, the error is lack of key-value separator.
+ − 1301 {
+ − 1302 msg = format_underline("toml::parse_key_value_pair: "
+ − 1303 "missing key-value separator `=`",
+ − 1304 {{source_location(loc), "should be `=`"}});
+ − 1305 }
+ − 1306 loc.reset(first);
+ − 1307 return err(std::move(msg));
+ − 1308 }
+ − 1309
+ − 1310 const auto after_kvsp = loc.iter(); // err msg
+ − 1311 auto val = parse_value<value_type>(loc, n_rec);
+ − 1312 if(!val)
+ − 1313 {
+ − 1314 std::string msg;
+ − 1315 loc.reset(after_kvsp);
+ − 1316 // check there is something not a comment/whitespace after `=`
+ − 1317 if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
+ − 1318 {
+ − 1319 loc.reset(after_kvsp);
+ − 1320 msg = format_underline("toml::parse_key_value_pair: "
+ − 1321 "missing value after key-value separator '='",
+ − 1322 {{source_location(loc), "expected value, but got nothing"}});
+ − 1323 }
+ − 1324 else // there is something not a comment/whitespace, so invalid format.
+ − 1325 {
+ − 1326 msg = std::move(val.unwrap_err());
+ − 1327 }
+ − 1328 loc.reset(first);
+ − 1329 return err(msg);
+ − 1330 }
+ − 1331 return ok(std::make_pair(std::move(key_reg.unwrap()),
+ − 1332 std::move(val.unwrap())));
+ − 1333 }
+ − 1334
+ − 1335 // for error messages.
+ − 1336 template<typename InputIterator>
+ − 1337 std::string format_dotted_keys(InputIterator first, const InputIterator last)
+ − 1338 {
+ − 1339 static_assert(std::is_same<key,
+ − 1340 typename std::iterator_traits<InputIterator>::value_type>::value,"");
+ − 1341
+ − 1342 std::string retval(*first++);
+ − 1343 for(; first != last; ++first)
+ − 1344 {
+ − 1345 retval += '.';
+ − 1346 retval += *first;
+ − 1347 }
+ − 1348 return retval;
+ − 1349 }
+ − 1350
+ − 1351 // forward decl for is_valid_forward_table_definition
+ − 1352 result<std::pair<std::vector<key>, region>, std::string>
+ − 1353 parse_table_key(location& loc);
+ − 1354 result<std::pair<std::vector<key>, region>, std::string>
+ − 1355 parse_array_table_key(location& loc);
+ − 1356 template<typename Value>
+ − 1357 result<std::pair<typename Value::table_type, region>, std::string>
+ − 1358 parse_inline_table(location& loc, const std::size_t n_rec);
+ − 1359
+ − 1360 // The following toml file is allowed.
+ − 1361 // ```toml
+ − 1362 // [a.b.c] # here, table `a` has element `b`.
+ − 1363 // foo = "bar"
+ − 1364 // [a] # merge a = {baz = "qux"} to a = {b = {...}}
+ − 1365 // baz = "qux"
+ − 1366 // ```
+ − 1367 // But the following is not allowed.
+ − 1368 // ```toml
+ − 1369 // [a]
+ − 1370 // b.c.foo = "bar"
+ − 1371 // [a] # error! the same table [a] defined!
+ − 1372 // baz = "qux"
+ − 1373 // ```
+ − 1374 // The following is neither allowed.
+ − 1375 // ```toml
+ − 1376 // a = { b.c.foo = "bar"}
+ − 1377 // [a] # error! the same table [a] defined!
+ − 1378 // baz = "qux"
+ − 1379 // ```
+ − 1380 // Here, it parses region of `tab->at(k)` as a table key and check the depth
+ − 1381 // of the key. If the key region points deeper node, it would be allowed.
+ − 1382 // Otherwise, the key points the same node. It would be rejected.
+ − 1383 template<typename Value, typename Iterator>
+ − 1384 bool is_valid_forward_table_definition(const Value& fwd, const Value& inserting,
+ − 1385 Iterator key_first, Iterator key_curr, Iterator key_last)
+ − 1386 {
+ − 1387 // ------------------------------------------------------------------------
+ − 1388 // check type of the value to be inserted/merged
+ − 1389
+ − 1390 std::string inserting_reg = "";
+ − 1391 if(const auto ptr = detail::get_region(inserting))
+ − 1392 {
+ − 1393 inserting_reg = ptr->str();
+ − 1394 }
+ − 1395 location inserting_def("internal", std::move(inserting_reg));
+ − 1396 if(const auto inlinetable = parse_inline_table<Value>(inserting_def, 0))
+ − 1397 {
+ − 1398 // check if we are overwriting existing table.
+ − 1399 // ```toml
+ − 1400 // # NG
+ − 1401 // a.b = 42
+ − 1402 // a = {d = 3.14}
+ − 1403 // ```
+ − 1404 // Inserting an inline table to a existing super-table is not allowed in
+ − 1405 // any case. If we found it, we can reject it without further checking.
+ − 1406 return false;
+ − 1407 }
+ − 1408
+ − 1409 // Valid and invalid cases when inserting to the [a.b] table:
+ − 1410 //
+ − 1411 // ## Invalid
+ − 1412 //
+ − 1413 // ```toml
+ − 1414 // # invalid
+ − 1415 // [a]
+ − 1416 // b.c.d = "foo"
+ − 1417 // [a.b] # a.b is already defined and closed
+ − 1418 // d = "bar"
+ − 1419 // ```
+ − 1420 // ```toml
+ − 1421 // # invalid
+ − 1422 // a = {b.c.d = "foo"}
+ − 1423 // [a.b] # a is already defined and inline table is closed
+ − 1424 // d = "bar"
+ − 1425 // ```
+ − 1426 // ```toml
+ − 1427 // # invalid
+ − 1428 // a.b.c.d = "foo"
+ − 1429 // [a.b] # a.b is already defined and dotted-key table is closed
+ − 1430 // d = "bar"
+ − 1431 // ```
+ − 1432 //
+ − 1433 // ## Valid
+ − 1434 //
+ − 1435 // ```toml
+ − 1436 // # OK. a.b is defined, but is *overwritable*
+ − 1437 // [a.b.c]
+ − 1438 // d = "foo"
+ − 1439 // [a.b]
+ − 1440 // d = "bar"
+ − 1441 // ```
+ − 1442 // ```toml
+ − 1443 // # OK. a.b is defined, but is *overwritable*
+ − 1444 // [a]
+ − 1445 // b.c.d = "foo"
+ − 1446 // b.e = "bar"
+ − 1447 // ```
+ − 1448
+ − 1449 // ------------------------------------------------------------------------
+ − 1450 // check table defined before
+ − 1451
+ − 1452 std::string internal = "";
+ − 1453 if(const auto ptr = detail::get_region(fwd))
+ − 1454 {
+ − 1455 internal = ptr->str();
+ − 1456 }
+ − 1457 location def("internal", std::move(internal));
+ − 1458 if(const auto tabkeys = parse_table_key(def)) // [table.key]
+ − 1459 {
+ − 1460 // table keys always contains all the nodes from the root.
+ − 1461 const auto& tks = tabkeys.unwrap().first;
+ − 1462 if(std::size_t(std::distance(key_first, key_last)) == tks.size() &&
+ − 1463 std::equal(tks.begin(), tks.end(), key_first))
+ − 1464 {
+ − 1465 // the keys are equivalent. it is not allowed.
+ − 1466 return false;
+ − 1467 }
+ − 1468 // the keys are not equivalent. it is allowed.
+ − 1469 return true;
+ − 1470 }
+ − 1471 // nested array-of-table definition implicitly defines tables.
+ − 1472 // those tables can be reopened.
+ − 1473 if(const auto atabkeys = parse_array_table_key(def))
+ − 1474 {
+ − 1475 // table keys always contains all the nodes from the root.
+ − 1476 const auto& tks = atabkeys.unwrap().first;
+ − 1477 if(std::size_t(std::distance(key_first, key_last)) == tks.size() &&
+ − 1478 std::equal(tks.begin(), tks.end(), key_first))
+ − 1479 {
+ − 1480 // the keys are equivalent. it is not allowed.
+ − 1481 return false;
+ − 1482 }
+ − 1483 // the keys are not equivalent. it is allowed.
+ − 1484 return true;
+ − 1485 }
+ − 1486 if(const auto dotkeys = parse_key(def)) // a.b.c = "foo"
+ − 1487 {
+ − 1488 // consider the following case.
+ − 1489 // [a]
+ − 1490 // b.c = {d = 42}
+ − 1491 // [a.b.c]
+ − 1492 // e = 2.71
+ − 1493 // this defines the table [a.b.c] twice. no?
+ − 1494 if(const auto reopening_dotkey_by_table = parse_table_key(inserting_def))
+ − 1495 {
+ − 1496 // re-opening a dotkey-defined table by a table is invalid.
+ − 1497 // only dotkey can append a key-val. Like:
+ − 1498 // ```toml
+ − 1499 // a.b.c = "foo"
+ − 1500 // a.b.d = "bar" # OK. reopen `a.b` by dotkey
+ − 1501 // [a.b]
+ − 1502 // e = "bar" # Invalid. re-opening `a.b` by [a.b] is not allowed.
+ − 1503 // ```
+ − 1504 return false;
+ − 1505 }
+ − 1506
+ − 1507 // a dotted key starts from the node representing a table in which the
+ − 1508 // dotted key belongs to.
+ − 1509 const auto& dks = dotkeys.unwrap().first;
+ − 1510 if(std::size_t(std::distance(key_curr, key_last)) == dks.size() &&
+ − 1511 std::equal(dks.begin(), dks.end(), key_curr))
+ − 1512 {
+ − 1513 // the keys are equivalent. it is not allowed.
+ − 1514 return false;
+ − 1515 }
+ − 1516 // the keys are not equivalent. it is allowed.
+ − 1517 return true;
+ − 1518 }
+ − 1519 return false;
+ − 1520 }
+ − 1521
+ − 1522 template<typename Value, typename InputIterator>
+ − 1523 result<bool, std::string>
+ − 1524 insert_nested_key(typename Value::table_type& root, const Value& v,
+ − 1525 InputIterator iter, const InputIterator last,
+ − 1526 region key_reg,
+ − 1527 const bool is_array_of_table = false)
+ − 1528 {
+ − 1529 static_assert(std::is_same<key,
+ − 1530 typename std::iterator_traits<InputIterator>::value_type>::value,"");
+ − 1531
+ − 1532 using value_type = Value;
+ − 1533 using table_type = typename value_type::table_type;
+ − 1534 using array_type = typename value_type::array_type;
+ − 1535
+ − 1536 const auto first = iter;
+ − 1537 assert(iter != last);
+ − 1538
+ − 1539 table_type* tab = std::addressof(root);
+ − 1540 for(; iter != last; ++iter) // search recursively
+ − 1541 {
+ − 1542 const key& k = *iter;
+ − 1543 if(std::next(iter) == last) // k is the last key
+ − 1544 {
+ − 1545 // XXX if the value is array-of-tables, there can be several
+ − 1546 // tables that are in the same array. in that case, we need to
+ − 1547 // find the last element and insert it to there.
+ − 1548 if(is_array_of_table)
+ − 1549 {
+ − 1550 if(tab->count(k) == 1) // there is already an array of table
+ − 1551 {
+ − 1552 if(tab->at(k).is_table())
+ − 1553 {
+ − 1554 // show special err msg for conflicting table
+ − 1555 throw syntax_error(format_underline(concat_to_string(
+ − 1556 "toml::insert_value: array of table (\"",
+ − 1557 format_dotted_keys(first, last),
+ − 1558 "\") cannot be defined"), {
+ − 1559 {tab->at(k).location(), "table already defined"},
+ − 1560 {v.location(), "this conflicts with the previous table"}
+ − 1561 }), v.location());
+ − 1562 }
+ − 1563 else if(!(tab->at(k).is_array()))
+ − 1564 {
+ − 1565 throw syntax_error(format_underline(concat_to_string(
+ − 1566 "toml::insert_value: array of table (\"",
+ − 1567 format_dotted_keys(first, last), "\") collides with"
+ − 1568 " existing value"), {
+ − 1569 {tab->at(k).location(),
+ − 1570 concat_to_string("this ", tab->at(k).type(),
+ − 1571 " value already exists")},
+ − 1572 {v.location(),
+ − 1573 "while inserting this array-of-tables"}
+ − 1574 }), v.location());
+ − 1575 }
+ − 1576 // the above if-else-if checks tab->at(k) is an array
+ − 1577 auto& a = tab->at(k).as_array();
+ − 1578 // If table element is defined as [[array_of_tables]], it
+ − 1579 // cannot be an empty array. If an array of tables is
+ − 1580 // defined as `aot = []`, it cannot be appended.
+ − 1581 if(a.empty() || !(a.front().is_table()))
+ − 1582 {
+ − 1583 throw syntax_error(format_underline(concat_to_string(
+ − 1584 "toml::insert_value: array of table (\"",
+ − 1585 format_dotted_keys(first, last), "\") collides with"
+ − 1586 " existing value"), {
+ − 1587 {tab->at(k).location(),
+ − 1588 concat_to_string("this ", tab->at(k).type(),
+ − 1589 " value already exists")},
+ − 1590 {v.location(),
+ − 1591 "while inserting this array-of-tables"}
+ − 1592 }), v.location());
+ − 1593 }
+ − 1594 // avoid conflicting array of table like the following.
+ − 1595 // ```toml
+ − 1596 // a = [{b = 42}] # define a as an array of *inline* tables
+ − 1597 // [[a]] # a is an array of *multi-line* tables
+ − 1598 // b = 54
+ − 1599 // ```
+ − 1600 // Here, from the type information, these cannot be detected
+ − 1601 // because inline table is also a table.
+ − 1602 // But toml v0.5.0 explicitly says it is invalid. The above
+ − 1603 // array-of-tables has a static size and appending to the
+ − 1604 // array is invalid.
+ − 1605 // In this library, multi-line table value has a region
+ − 1606 // that points to the key of the table (e.g. [[a]]). By
+ − 1607 // comparing the first two letters in key, we can detect
+ − 1608 // the array-of-table is inline or multiline.
+ − 1609 if(const auto ptr = detail::get_region(a.front()))
+ − 1610 {
+ − 1611 if(ptr->str().substr(0,2) != "[[")
+ − 1612 {
+ − 1613 throw syntax_error(format_underline(concat_to_string(
+ − 1614 "toml::insert_value: array of table (\"",
+ − 1615 format_dotted_keys(first, last), "\") collides "
+ − 1616 "with existing array-of-tables"), {
+ − 1617 {tab->at(k).location(),
+ − 1618 concat_to_string("this ", tab->at(k).type(),
+ − 1619 " value has static size")},
+ − 1620 {v.location(),
+ − 1621 "appending it to the statically sized array"}
+ − 1622 }), v.location());
+ − 1623 }
+ − 1624 }
+ − 1625 a.push_back(v);
+ − 1626 return ok(true);
+ − 1627 }
+ − 1628 else // if not, we need to create the array of table
+ − 1629 {
+ − 1630 // XXX: Consider the following array of tables.
+ − 1631 // ```toml
+ − 1632 // # This is a comment.
+ − 1633 // [[aot]]
+ − 1634 // foo = "bar"
+ − 1635 // ```
+ − 1636 // Here, the comment is for `aot`. But here, actually two
+ − 1637 // values are defined. An array that contains tables, named
+ − 1638 // `aot`, and the 0th element of the `aot`, `{foo = "bar"}`.
+ − 1639 // Those two are different from each other. But both of them
+ − 1640 // points to the same portion of the TOML file, `[[aot]]`,
+ − 1641 // so `key_reg.comments()` returns `# This is a comment`.
+ − 1642 // If it is assigned as a comment of `aot` defined here, the
+ − 1643 // comment will be duplicated. Both the `aot` itself and
+ − 1644 // the 0-th element will have the same comment. This causes
+ − 1645 // "duplication of the same comments" bug when the data is
+ − 1646 // serialized.
+ − 1647 // Next, consider the following.
+ − 1648 // ```toml
+ − 1649 // # comment 1
+ − 1650 // aot = [
+ − 1651 // # comment 2
+ − 1652 // {foo = "bar"},
+ − 1653 // ]
+ − 1654 // ```
+ − 1655 // In this case, we can distinguish those two comments. So
+ − 1656 // here we need to add "comment 1" to the `aot` and
+ − 1657 // "comment 2" to the 0th element of that.
+ − 1658 // To distinguish those two, we check the key region.
+ − 1659 std::vector<std::string> comments{/* empty by default */};
+ − 1660 if(key_reg.str().substr(0, 2) != "[[")
+ − 1661 {
+ − 1662 comments = key_reg.comments();
+ − 1663 }
+ − 1664 value_type aot(array_type(1, v), key_reg, std::move(comments));
+ − 1665 tab->insert(std::make_pair(k, aot));
+ − 1666 return ok(true);
+ − 1667 }
+ − 1668 } // end if(array of table)
+ − 1669
+ − 1670 if(tab->count(k) == 1)
+ − 1671 {
+ − 1672 if(tab->at(k).is_table() && v.is_table())
+ − 1673 {
+ − 1674 if(!is_valid_forward_table_definition(
+ − 1675 tab->at(k), v, first, iter, last))
+ − 1676 {
+ − 1677 throw syntax_error(format_underline(concat_to_string(
+ − 1678 "toml::insert_value: table (\"",
+ − 1679 format_dotted_keys(first, last),
+ − 1680 "\") already exists."), {
+ − 1681 {tab->at(k).location(), "table already exists here"},
+ − 1682 {v.location(), "table defined twice"}
+ − 1683 }), v.location());
+ − 1684 }
+ − 1685 // to allow the following toml file.
+ − 1686 // [a.b.c]
+ − 1687 // d = 42
+ − 1688 // [a]
+ − 1689 // e = 2.71
+ − 1690 auto& t = tab->at(k).as_table();
+ − 1691 for(const auto& kv : v.as_table())
+ − 1692 {
+ − 1693 if(tab->at(k).contains(kv.first))
+ − 1694 {
+ − 1695 throw syntax_error(format_underline(concat_to_string(
+ − 1696 "toml::insert_value: value (\"",
+ − 1697 format_dotted_keys(first, last),
+ − 1698 "\") already exists."), {
+ − 1699 {t.at(kv.first).location(), "already exists here"},
+ − 1700 {v.location(), "this defined twice"}
+ − 1701 }), v.location());
+ − 1702 }
+ − 1703 t[kv.first] = kv.second;
+ − 1704 }
+ − 1705 detail::change_region(tab->at(k), key_reg);
+ − 1706 return ok(true);
+ − 1707 }
+ − 1708 else if(v.is_table() &&
+ − 1709 tab->at(k).is_array() &&
+ − 1710 tab->at(k).as_array().size() > 0 &&
+ − 1711 tab->at(k).as_array().front().is_table())
+ − 1712 {
+ − 1713 throw syntax_error(format_underline(concat_to_string(
+ − 1714 "toml::insert_value: array of tables (\"",
+ − 1715 format_dotted_keys(first, last), "\") already exists."), {
+ − 1716 {tab->at(k).location(), "array of tables defined here"},
+ − 1717 {v.location(), "table conflicts with the previous array of table"}
+ − 1718 }), v.location());
+ − 1719 }
+ − 1720 else
+ − 1721 {
+ − 1722 throw syntax_error(format_underline(concat_to_string(
+ − 1723 "toml::insert_value: value (\"",
+ − 1724 format_dotted_keys(first, last), "\") already exists."), {
+ − 1725 {tab->at(k).location(), "value already exists here"},
+ − 1726 {v.location(), "value defined twice"}
+ − 1727 }), v.location());
+ − 1728 }
+ − 1729 }
+ − 1730 tab->insert(std::make_pair(k, v));
+ − 1731 return ok(true);
+ − 1732 }
+ − 1733 else // k is not the last one, we should insert recursively
+ − 1734 {
+ − 1735 // if there is no corresponding value, insert it first.
+ − 1736 // related: you don't need to write
+ − 1737 // # [x]
+ − 1738 // # [x.y]
+ − 1739 // to write
+ − 1740 // [x.y.z]
+ − 1741 if(tab->count(k) == 0)
+ − 1742 {
+ − 1743 // a table that is defined implicitly doesn't have any comments.
+ − 1744 (*tab)[k] = value_type(table_type{}, key_reg, {/*no comment*/});
+ − 1745 }
+ − 1746
+ − 1747 // type checking...
+ − 1748 if(tab->at(k).is_table())
+ − 1749 {
+ − 1750 // According to toml-lang/toml:36d3091b3 "Clarify that inline
+ − 1751 // tables are immutable", check if it adds key-value pair to an
+ − 1752 // inline table.
+ − 1753 if(const auto* ptr = get_region(tab->at(k)))
+ − 1754 {
+ − 1755 // here, if the value is a (multi-line) table, the region
+ − 1756 // should be something like `[table-name]`.
+ − 1757 if(ptr->front() == '{')
+ − 1758 {
+ − 1759 throw syntax_error(format_underline(concat_to_string(
+ − 1760 "toml::insert_value: inserting to an inline table (",
+ − 1761 format_dotted_keys(first, std::next(iter)),
+ − 1762 ") but inline tables are immutable"), {
+ − 1763 {tab->at(k).location(), "inline tables are immutable"},
+ − 1764 {v.location(), "inserting this"}
+ − 1765 }), v.location());
+ − 1766 }
+ − 1767 }
+ − 1768 tab = std::addressof((*tab)[k].as_table());
+ − 1769 }
+ − 1770 else if(tab->at(k).is_array()) // inserting to array-of-tables?
+ − 1771 {
+ − 1772 auto& a = (*tab)[k].as_array();
+ − 1773 if(!a.back().is_table())
+ − 1774 {
+ − 1775 throw syntax_error(format_underline(concat_to_string(
+ − 1776 "toml::insert_value: target (",
+ − 1777 format_dotted_keys(first, std::next(iter)),
+ − 1778 ") is neither table nor an array of tables"), {
+ − 1779 {a.back().location(), concat_to_string(
+ − 1780 "actual type is ", a.back().type())},
+ − 1781 {v.location(), "inserting this"}
+ − 1782 }), v.location());
+ − 1783 }
+ − 1784 if(a.empty())
+ − 1785 {
+ − 1786 throw syntax_error(format_underline(concat_to_string(
+ − 1787 "toml::insert_value: table (\"",
+ − 1788 format_dotted_keys(first, last), "\") conflicts with"
+ − 1789 " existing value"), {
+ − 1790 {tab->at(k).location(), std::string("this array is not insertable")},
+ − 1791 {v.location(), std::string("appending it to the statically sized array")}
+ − 1792 }), v.location());
+ − 1793 }
+ − 1794 if(const auto ptr = detail::get_region(a.at(0)))
+ − 1795 {
+ − 1796 if(ptr->str().substr(0,2) != "[[")
+ − 1797 {
+ − 1798 throw syntax_error(format_underline(concat_to_string(
+ − 1799 "toml::insert_value: a table (\"",
+ − 1800 format_dotted_keys(first, last), "\") cannot be "
+ − 1801 "inserted to an existing inline array-of-tables"), {
+ − 1802 {tab->at(k).location(), std::string("this array of table has a static size")},
+ − 1803 {v.location(), std::string("appending it to the statically sized array")}
+ − 1804 }), v.location());
+ − 1805 }
+ − 1806 }
+ − 1807 tab = std::addressof(a.back().as_table());
+ − 1808 }
+ − 1809 else
+ − 1810 {
+ − 1811 throw syntax_error(format_underline(concat_to_string(
+ − 1812 "toml::insert_value: target (",
+ − 1813 format_dotted_keys(first, std::next(iter)),
+ − 1814 ") is neither table nor an array of tables"), {
+ − 1815 {tab->at(k).location(), concat_to_string(
+ − 1816 "actual type is ", tab->at(k).type())},
+ − 1817 {v.location(), "inserting this"}
+ − 1818 }), v.location());
+ − 1819 }
+ − 1820 }
+ − 1821 }
+ − 1822 return err(std::string("toml::detail::insert_nested_key: never reach here"));
+ − 1823 }
+ − 1824
+ − 1825 template<typename Value>
+ − 1826 result<std::pair<typename Value::table_type, region>, std::string>
+ − 1827 parse_inline_table(location& loc, const std::size_t n_rec)
+ − 1828 {
+ − 1829 using value_type = Value;
+ − 1830 using table_type = typename value_type::table_type;
+ − 1831
+ − 1832 if(n_rec > TOML11_VALUE_RECURSION_LIMIT)
+ − 1833 {
+ − 1834 throw syntax_error(std::string("toml::parse_inline_table: recursion limit ("
+ − 1835 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"),
+ − 1836 source_location(loc));
+ − 1837 }
+ − 1838
+ − 1839 const auto first = loc.iter();
+ − 1840 table_type retval;
+ − 1841 if(!(loc.iter() != loc.end() && *loc.iter() == '{'))
+ − 1842 {
+ − 1843 return err(format_underline("toml::parse_inline_table: ",
+ − 1844 {{source_location(loc), "the next token is not an inline table"}}));
+ − 1845 }
+ − 1846 loc.advance();
+ − 1847
+ − 1848 // check if the inline table is an empty table = { }
+ − 1849 maybe<lex_ws>::invoke(loc);
+ − 1850 if(loc.iter() != loc.end() && *loc.iter() == '}')
+ − 1851 {
+ − 1852 loc.advance(); // skip `}`
+ − 1853 return ok(std::make_pair(retval, region(loc, first, loc.iter())));
+ − 1854 }
+ − 1855
+ − 1856 // it starts from "{". it should be formatted as inline-table
+ − 1857 while(loc.iter() != loc.end())
+ − 1858 {
+ − 1859 const auto kv_r = parse_key_value_pair<value_type>(loc, n_rec+1);
+ − 1860 if(!kv_r)
+ − 1861 {
+ − 1862 return err(kv_r.unwrap_err());
+ − 1863 }
+ − 1864
+ − 1865 const auto& kvpair = kv_r.unwrap();
+ − 1866 const std::vector<key>& keys = kvpair.first.first;
+ − 1867 const auto& key_reg = kvpair.first.second;
+ − 1868 const value_type& val = kvpair.second;
+ − 1869
+ − 1870 const auto inserted =
+ − 1871 insert_nested_key(retval, val, keys.begin(), keys.end(), key_reg);
+ − 1872 if(!inserted)
+ − 1873 {
+ − 1874 throw internal_error("toml::parse_inline_table: "
+ − 1875 "failed to insert value into table: " + inserted.unwrap_err(),
+ − 1876 source_location(loc));
+ − 1877 }
+ − 1878
+ − 1879 using lex_table_separator = sequence<maybe<lex_ws>, character<','>>;
+ − 1880 const auto sp = lex_table_separator::invoke(loc);
+ − 1881
+ − 1882 if(!sp)
+ − 1883 {
+ − 1884 maybe<lex_ws>::invoke(loc);
+ − 1885
+ − 1886 if(loc.iter() == loc.end())
+ − 1887 {
+ − 1888 throw syntax_error(format_underline(
+ − 1889 "toml::parse_inline_table: missing table separator `}` ",
+ − 1890 {{source_location(loc), "should be `}`"}}),
+ − 1891 source_location(loc));
+ − 1892 }
+ − 1893 else if(*loc.iter() == '}')
+ − 1894 {
+ − 1895 loc.advance(); // skip `}`
+ − 1896 return ok(std::make_pair(
+ − 1897 retval, region(loc, first, loc.iter())));
+ − 1898 }
+ − 1899 else if(*loc.iter() == '#' || *loc.iter() == '\r' || *loc.iter() == '\n')
+ − 1900 {
+ − 1901 throw syntax_error(format_underline(
+ − 1902 "toml::parse_inline_table: missing curly brace `}`",
+ − 1903 {{source_location(loc), "should be `}`"}}),
+ − 1904 source_location(loc));
+ − 1905 }
+ − 1906 else
+ − 1907 {
+ − 1908 throw syntax_error(format_underline(
+ − 1909 "toml::parse_inline_table: missing table separator `,` ",
+ − 1910 {{source_location(loc), "should be `,`"}}),
+ − 1911 source_location(loc));
+ − 1912 }
+ − 1913 }
+ − 1914 else // `,` is found
+ − 1915 {
+ − 1916 maybe<lex_ws>::invoke(loc);
+ − 1917 if(loc.iter() != loc.end() && *loc.iter() == '}')
+ − 1918 {
+ − 1919 throw syntax_error(format_underline(
+ − 1920 "toml::parse_inline_table: trailing comma is not allowed in"
+ − 1921 " an inline table",
+ − 1922 {{source_location(loc), "should be `}`"}}),
+ − 1923 source_location(loc));
+ − 1924 }
+ − 1925 }
+ − 1926 }
+ − 1927 loc.reset(first);
+ − 1928 throw syntax_error(format_underline("toml::parse_inline_table: "
+ − 1929 "inline table did not closed by `}`",
+ − 1930 {{source_location(loc), "should be closed"}}),
+ − 1931 source_location(loc));
+ − 1932 }
+ − 1933
+ − 1934 inline result<value_t, std::string> guess_number_type(const location& l)
+ − 1935 {
+ − 1936 // This function tries to find some (common) mistakes by checking characters
+ − 1937 // that follows the last character of a value. But it is often difficult
+ − 1938 // because some non-newline characters can appear after a value. E.g.
+ − 1939 // spaces, tabs, commas (in an array or inline table), closing brackets
+ − 1940 // (of an array or inline table), comment-sign (#). Since this function
+ − 1941 // does not parse further, those characters are always allowed to be there.
+ − 1942 location loc = l;
+ − 1943
+ − 1944 if(lex_offset_date_time::invoke(loc)) {return ok(value_t::offset_datetime);}
+ − 1945 loc.reset(l.iter());
+ − 1946
+ − 1947 if(lex_local_date_time::invoke(loc))
+ − 1948 {
+ − 1949 // bad offset may appear after this.
+ − 1950 if(loc.iter() != loc.end() && (*loc.iter() == '+' || *loc.iter() == '-'
+ − 1951 || *loc.iter() == 'Z' || *loc.iter() == 'z'))
+ − 1952 {
+ − 1953 return err(format_underline("bad offset: should be [+-]HH:MM or Z",
+ − 1954 {{source_location(loc), "[+-]HH:MM or Z"}},
+ − 1955 {"pass: +09:00, -05:30", "fail: +9:00, -5:30"}));
+ − 1956 }
+ − 1957 return ok(value_t::local_datetime);
+ − 1958 }
+ − 1959 loc.reset(l.iter());
+ − 1960
+ − 1961 if(lex_local_date::invoke(loc))
+ − 1962 {
+ − 1963 // bad time may appear after this.
+ − 1964 // A space is allowed as a delimiter between local time. But there are
+ − 1965 // both cases in which a space becomes valid or invalid.
+ − 1966 // - invalid: 2019-06-16 7:00:00
+ − 1967 // - valid : 2019-06-16 07:00:00
+ − 1968 if(loc.iter() != loc.end())
+ − 1969 {
+ − 1970 const auto c = *loc.iter();
+ − 1971 if(c == 'T' || c == 't')
+ − 1972 {
+ − 1973 return err(format_underline("bad time: should be HH:MM:SS.subsec",
+ − 1974 {{source_location(loc), "HH:MM:SS.subsec"}},
+ − 1975 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
+ − 1976 "fail: 1979-05-27T7:32:00, 1979-05-27 17:32"}));
+ − 1977 }
+ − 1978 if('0' <= c && c <= '9')
+ − 1979 {
+ − 1980 return err(format_underline("bad time: missing T",
+ − 1981 {{source_location(loc), "T or space required here"}},
+ − 1982 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
+ − 1983 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
+ − 1984 }
+ − 1985 if(c == ' ' && std::next(loc.iter()) != loc.end() &&
+ − 1986 ('0' <= *std::next(loc.iter()) && *std::next(loc.iter())<= '9'))
+ − 1987 {
+ − 1988 loc.advance();
+ − 1989 return err(format_underline("bad time: should be HH:MM:SS.subsec",
+ − 1990 {{source_location(loc), "HH:MM:SS.subsec"}},
+ − 1991 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
+ − 1992 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
+ − 1993 }
+ − 1994 }
+ − 1995 return ok(value_t::local_date);
+ − 1996 }
+ − 1997 loc.reset(l.iter());
+ − 1998
+ − 1999 if(lex_local_time::invoke(loc)) {return ok(value_t::local_time);}
+ − 2000 loc.reset(l.iter());
+ − 2001
+ − 2002 if(lex_float::invoke(loc))
+ − 2003 {
+ − 2004 if(loc.iter() != loc.end() && *loc.iter() == '_')
+ − 2005 {
+ − 2006 return err(format_underline("bad float: `_` should be surrounded by digits",
+ − 2007 {{source_location(loc), "here"}},
+ − 2008 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
+ − 2009 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
+ − 2010 }
+ − 2011 return ok(value_t::floating);
+ − 2012 }
+ − 2013 loc.reset(l.iter());
+ − 2014
+ − 2015 if(lex_integer::invoke(loc))
+ − 2016 {
+ − 2017 if(loc.iter() != loc.end())
+ − 2018 {
+ − 2019 const auto c = *loc.iter();
+ − 2020 if(c == '_')
+ − 2021 {
+ − 2022 return err(format_underline("bad integer: `_` should be surrounded by digits",
+ − 2023 {{source_location(loc), "here"}},
+ − 2024 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
+ − 2025 "fail: 1__000, 0123"}));
+ − 2026 }
+ − 2027 if('0' <= c && c <= '9')
+ − 2028 {
+ − 2029 // leading zero. point '0'
+ − 2030 loc.retrace();
+ − 2031 return err(format_underline("bad integer: leading zero",
+ − 2032 {{source_location(loc), "here"}},
+ − 2033 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
+ − 2034 "fail: 1__000, 0123"}));
+ − 2035 }
+ − 2036 if(c == ':' || c == '-')
+ − 2037 {
+ − 2038 return err(format_underline("bad datetime: invalid format",
+ − 2039 {{source_location(loc), "here"}},
+ − 2040 {"pass: 1979-05-27T07:32:00-07:00, 1979-05-27 07:32:00.999999Z",
+ − 2041 "fail: 1979-05-27T7:32:00-7:00, 1979-05-27 7:32-00:30"}));
+ − 2042 }
+ − 2043 if(c == '.' || c == 'e' || c == 'E')
+ − 2044 {
+ − 2045 return err(format_underline("bad float: invalid format",
+ − 2046 {{source_location(loc), "here"}},
+ − 2047 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
+ − 2048 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
+ − 2049 }
+ − 2050 }
+ − 2051 return ok(value_t::integer);
+ − 2052 }
+ − 2053 if(loc.iter() != loc.end() && *loc.iter() == '.')
+ − 2054 {
+ − 2055 return err(format_underline("bad float: invalid format",
+ − 2056 {{source_location(loc), "integer part required before this"}},
+ − 2057 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
+ − 2058 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
+ − 2059 }
+ − 2060 if(loc.iter() != loc.end() && *loc.iter() == '_')
+ − 2061 {
+ − 2062 return err(format_underline("bad number: `_` should be surrounded by digits",
+ − 2063 {{source_location(loc), "`_` is not surrounded by digits"}},
+ − 2064 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
+ − 2065 "fail: 1__000, 0123"}));
+ − 2066 }
+ − 2067 return err(format_underline("bad format: unknown value appeared",
+ − 2068 {{source_location(loc), "here"}}));
+ − 2069 }
+ − 2070
+ − 2071 inline result<value_t, std::string> guess_value_type(const location& loc)
+ − 2072 {
+ − 2073 switch(*loc.iter())
+ − 2074 {
+ − 2075 case '"' : {return ok(value_t::string); }
+ − 2076 case '\'': {return ok(value_t::string); }
+ − 2077 case 't' : {return ok(value_t::boolean); }
+ − 2078 case 'f' : {return ok(value_t::boolean); }
+ − 2079 case '[' : {return ok(value_t::array); }
+ − 2080 case '{' : {return ok(value_t::table); }
+ − 2081 case 'i' : {return ok(value_t::floating);} // inf.
+ − 2082 case 'n' : {return ok(value_t::floating);} // nan.
+ − 2083 default : {return guess_number_type(loc);}
+ − 2084 }
+ − 2085 }
+ − 2086
+ − 2087 template<typename Value, typename T>
+ − 2088 result<Value, std::string>
+ − 2089 parse_value_helper(result<std::pair<T, region>, std::string> rslt)
+ − 2090 {
+ − 2091 if(rslt.is_ok())
+ − 2092 {
+ − 2093 auto comments = rslt.as_ok().second.comments();
+ − 2094 return ok(Value(std::move(rslt.as_ok()), std::move(comments)));
+ − 2095 }
+ − 2096 else
+ − 2097 {
+ − 2098 return err(std::move(rslt.as_err()));
+ − 2099 }
+ − 2100 }
+ − 2101
+ − 2102 template<typename Value>
+ − 2103 result<Value, std::string> parse_value(location& loc, const std::size_t n_rec)
+ − 2104 {
+ − 2105 const auto first = loc.iter();
+ − 2106 if(first == loc.end())
+ − 2107 {
+ − 2108 return err(format_underline("toml::parse_value: input is empty",
+ − 2109 {{source_location(loc), ""}}));
+ − 2110 }
+ − 2111
+ − 2112 const auto type = guess_value_type(loc);
+ − 2113 if(!type)
+ − 2114 {
+ − 2115 return err(type.unwrap_err());
+ − 2116 }
+ − 2117
+ − 2118 switch(type.unwrap())
+ − 2119 {
+ − 2120 case value_t::boolean : {return parse_value_helper<Value>(parse_boolean(loc) );}
+ − 2121 case value_t::integer : {return parse_value_helper<Value>(parse_integer(loc) );}
+ − 2122 case value_t::floating : {return parse_value_helper<Value>(parse_floating(loc) );}
+ − 2123 case value_t::string : {return parse_value_helper<Value>(parse_string(loc) );}
+ − 2124 case value_t::offset_datetime: {return parse_value_helper<Value>(parse_offset_datetime(loc) );}
+ − 2125 case value_t::local_datetime : {return parse_value_helper<Value>(parse_local_datetime(loc) );}
+ − 2126 case value_t::local_date : {return parse_value_helper<Value>(parse_local_date(loc) );}
+ − 2127 case value_t::local_time : {return parse_value_helper<Value>(parse_local_time(loc) );}
+ − 2128 case value_t::array : {return parse_value_helper<Value>(parse_array<Value>(loc, n_rec));}
+ − 2129 case value_t::table : {return parse_value_helper<Value>(parse_inline_table<Value>(loc, n_rec));}
+ − 2130 default:
+ − 2131 {
+ − 2132 const auto msg = format_underline("toml::parse_value: "
+ − 2133 "unknown token appeared", {{source_location(loc), "unknown"}});
+ − 2134 loc.reset(first);
+ − 2135 return err(msg);
+ − 2136 }
+ − 2137 }
+ − 2138 }
+ − 2139
+ − 2140 inline result<std::pair<std::vector<key>, region>, std::string>
+ − 2141 parse_table_key(location& loc)
+ − 2142 {
+ − 2143 if(auto token = lex_std_table::invoke(loc))
+ − 2144 {
+ − 2145 location inner_loc(loc.name(), token.unwrap().str());
+ − 2146
+ − 2147 const auto open = lex_std_table_open::invoke(inner_loc);
+ − 2148 if(!open || inner_loc.iter() == inner_loc.end())
+ − 2149 {
+ − 2150 throw internal_error(format_underline(
+ − 2151 "toml::parse_table_key: no `[`",
+ − 2152 {{source_location(inner_loc), "should be `[`"}}),
+ − 2153 source_location(inner_loc));
+ − 2154 }
+ − 2155 // to skip [ a . b . c ]
+ − 2156 // ^----------- this whitespace
+ − 2157 lex_ws::invoke(inner_loc);
+ − 2158 const auto keys = parse_key(inner_loc);
+ − 2159 if(!keys)
+ − 2160 {
+ − 2161 throw internal_error(format_underline(
+ − 2162 "toml::parse_table_key: invalid key",
+ − 2163 {{source_location(inner_loc), "not key"}}),
+ − 2164 source_location(inner_loc));
+ − 2165 }
+ − 2166 // to skip [ a . b . c ]
+ − 2167 // ^-- this whitespace
+ − 2168 lex_ws::invoke(inner_loc);
+ − 2169 const auto close = lex_std_table_close::invoke(inner_loc);
+ − 2170 if(!close)
+ − 2171 {
+ − 2172 throw internal_error(format_underline(
+ − 2173 "toml::parse_table_key: no `]`",
+ − 2174 {{source_location(inner_loc), "should be `]`"}}),
+ − 2175 source_location(inner_loc));
+ − 2176 }
+ − 2177
+ − 2178 // after [table.key], newline or EOF(empty table) required.
+ − 2179 if(loc.iter() != loc.end())
+ − 2180 {
+ − 2181 using lex_newline_after_table_key =
+ − 2182 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
+ − 2183 const auto nl = lex_newline_after_table_key::invoke(loc);
+ − 2184 if(!nl)
+ − 2185 {
+ − 2186 throw syntax_error(format_underline(
+ − 2187 "toml::parse_table_key: newline required after [table.key]",
+ − 2188 {{source_location(loc), "expected newline"}}),
+ − 2189 source_location(loc));
+ − 2190 }
+ − 2191 }
+ − 2192 return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
+ − 2193 }
+ − 2194 else
+ − 2195 {
+ − 2196 return err(format_underline("toml::parse_table_key: "
+ − 2197 "not a valid table key", {{source_location(loc), "here"}}));
+ − 2198 }
+ − 2199 }
+ − 2200
+ − 2201 inline result<std::pair<std::vector<key>, region>, std::string>
+ − 2202 parse_array_table_key(location& loc)
+ − 2203 {
+ − 2204 if(auto token = lex_array_table::invoke(loc))
+ − 2205 {
+ − 2206 location inner_loc(loc.name(), token.unwrap().str());
+ − 2207
+ − 2208 const auto open = lex_array_table_open::invoke(inner_loc);
+ − 2209 if(!open || inner_loc.iter() == inner_loc.end())
+ − 2210 {
+ − 2211 throw internal_error(format_underline(
+ − 2212 "toml::parse_array_table_key: no `[[`",
+ − 2213 {{source_location(inner_loc), "should be `[[`"}}),
+ − 2214 source_location(inner_loc));
+ − 2215 }
+ − 2216 lex_ws::invoke(inner_loc);
+ − 2217 const auto keys = parse_key(inner_loc);
+ − 2218 if(!keys)
+ − 2219 {
+ − 2220 throw internal_error(format_underline(
+ − 2221 "toml::parse_array_table_key: invalid key",
+ − 2222 {{source_location(inner_loc), "not a key"}}),
+ − 2223 source_location(inner_loc));
+ − 2224 }
+ − 2225 lex_ws::invoke(inner_loc);
+ − 2226 const auto close = lex_array_table_close::invoke(inner_loc);
+ − 2227 if(!close)
+ − 2228 {
+ − 2229 throw internal_error(format_underline(
+ − 2230 "toml::parse_array_table_key: no `]]`",
+ − 2231 {{source_location(inner_loc), "should be `]]`"}}),
+ − 2232 source_location(inner_loc));
+ − 2233 }
+ − 2234
+ − 2235 // after [[table.key]], newline or EOF(empty table) required.
+ − 2236 if(loc.iter() != loc.end())
+ − 2237 {
+ − 2238 using lex_newline_after_table_key =
+ − 2239 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
+ − 2240 const auto nl = lex_newline_after_table_key::invoke(loc);
+ − 2241 if(!nl)
+ − 2242 {
+ − 2243 throw syntax_error(format_underline("toml::"
+ − 2244 "parse_array_table_key: newline required after [[table.key]]",
+ − 2245 {{source_location(loc), "expected newline"}}),
+ − 2246 source_location(loc));
+ − 2247 }
+ − 2248 }
+ − 2249 return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
+ − 2250 }
+ − 2251 else
+ − 2252 {
+ − 2253 return err(format_underline("toml::parse_array_table_key: "
+ − 2254 "not a valid table key", {{source_location(loc), "here"}}));
+ − 2255 }
+ − 2256 }
+ − 2257
+ − 2258 // parse table body (key-value pairs until the iter hits the next [tablekey])
+ − 2259 template<typename Value>
+ − 2260 result<typename Value::table_type, std::string>
+ − 2261 parse_ml_table(location& loc)
+ − 2262 {
+ − 2263 using value_type = Value;
+ − 2264 using table_type = typename value_type::table_type;
+ − 2265
+ − 2266 const auto first = loc.iter();
+ − 2267 if(first == loc.end())
+ − 2268 {
+ − 2269 return ok(table_type{});
+ − 2270 }
+ − 2271
+ − 2272 // XXX at lest one newline is needed.
+ − 2273 using skip_line = repeat<
+ − 2274 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>, at_least<1>>;
+ − 2275 skip_line::invoke(loc);
+ − 2276 lex_ws::invoke(loc);
+ − 2277
+ − 2278 table_type tab;
+ − 2279 while(loc.iter() != loc.end())
+ − 2280 {
+ − 2281 lex_ws::invoke(loc);
+ − 2282 const auto before = loc.iter();
+ − 2283 if(const auto tmp = parse_array_table_key(loc)) // next table found
+ − 2284 {
+ − 2285 loc.reset(before);
+ − 2286 return ok(tab);
+ − 2287 }
+ − 2288 if(const auto tmp = parse_table_key(loc)) // next table found
+ − 2289 {
+ − 2290 loc.reset(before);
+ − 2291 return ok(tab);
+ − 2292 }
+ − 2293
+ − 2294 if(const auto kv = parse_key_value_pair<value_type>(loc, 0))
+ − 2295 {
+ − 2296 const auto& kvpair = kv.unwrap();
+ − 2297 const std::vector<key>& keys = kvpair.first.first;
+ − 2298 const auto& key_reg = kvpair.first.second;
+ − 2299 const value_type& val = kvpair.second;
+ − 2300 const auto inserted =
+ − 2301 insert_nested_key(tab, val, keys.begin(), keys.end(), key_reg);
+ − 2302 if(!inserted)
+ − 2303 {
+ − 2304 return err(inserted.unwrap_err());
+ − 2305 }
+ − 2306 }
+ − 2307 else
+ − 2308 {
+ − 2309 return err(kv.unwrap_err());
+ − 2310 }
+ − 2311
+ − 2312 // comment lines are skipped by the above function call.
+ − 2313 // However, since the `skip_line` requires at least 1 newline, it fails
+ − 2314 // if the file ends with ws and/or comment without newline.
+ − 2315 // `skip_line` matches `ws? + comment? + newline`, not `ws` or `comment`
+ − 2316 // itself. To skip the last ws and/or comment, call lexers.
+ − 2317 // It does not matter if these fails, so the return value is discarded.
+ − 2318 lex_ws::invoke(loc);
+ − 2319 lex_comment::invoke(loc);
+ − 2320
+ − 2321 // skip_line is (whitespace? comment? newline)_{1,}. multiple empty lines
+ − 2322 // and comments after the last key-value pairs are allowed.
+ − 2323 const auto newline = skip_line::invoke(loc);
+ − 2324 if(!newline && loc.iter() != loc.end())
+ − 2325 {
+ − 2326 const auto before2 = loc.iter();
+ − 2327 lex_ws::invoke(loc); // skip whitespace
+ − 2328 const auto msg = format_underline("toml::parse_table: "
+ − 2329 "invalid line format", {{source_location(loc), concat_to_string(
+ − 2330 "expected newline, but got '", show_char(*loc.iter()), "'.")}});
+ − 2331 loc.reset(before2);
+ − 2332 return err(msg);
+ − 2333 }
+ − 2334
+ − 2335 // the skip_lines only matches with lines that includes newline.
+ − 2336 // to skip the last line that includes comment and/or whitespace
+ − 2337 // but no newline, call them one more time.
+ − 2338 lex_ws::invoke(loc);
+ − 2339 lex_comment::invoke(loc);
+ − 2340 }
+ − 2341 return ok(tab);
+ − 2342 }
+ − 2343
+ − 2344 template<typename Value>
+ − 2345 result<Value, std::string> parse_toml_file(location& loc)
+ − 2346 {
+ − 2347 using value_type = Value;
+ − 2348 using table_type = typename value_type::table_type;
+ − 2349
+ − 2350 const auto first = loc.iter();
+ − 2351 if(first == loc.end())
+ − 2352 {
+ − 2353 // For empty files, return an empty table with an empty region (zero-length).
+ − 2354 // Without the region, error messages would miss the filename.
+ − 2355 return ok(value_type(table_type{}, region(loc, first, first), {}));
+ − 2356 }
+ − 2357
+ − 2358 // put the first line as a region of a file
+ − 2359 // Here first != loc.end(), so taking std::next is okay
+ − 2360 const region file(loc, first, std::next(loc.iter()));
+ − 2361
+ − 2362 // The first successive comments that are separated from the first value
+ − 2363 // by an empty line are for a file itself.
+ − 2364 // ```toml
+ − 2365 // # this is a comment for a file.
+ − 2366 //
+ − 2367 // key = "the first value"
+ − 2368 // ```
+ − 2369 // ```toml
+ − 2370 // # this is a comment for "the first value".
+ − 2371 // key = "the first value"
+ − 2372 // ```
+ − 2373 std::vector<std::string> comments;
+ − 2374 using lex_first_comments = sequence<
+ − 2375 repeat<sequence<maybe<lex_ws>, lex_comment, lex_newline>, at_least<1>>,
+ − 2376 sequence<maybe<lex_ws>, lex_newline>
+ − 2377 >;
+ − 2378 if(const auto token = lex_first_comments::invoke(loc))
+ − 2379 {
+ − 2380 location inner_loc(loc.name(), token.unwrap().str());
+ − 2381 while(inner_loc.iter() != inner_loc.end())
+ − 2382 {
+ − 2383 maybe<lex_ws>::invoke(inner_loc); // remove ws if exists
+ − 2384 if(lex_newline::invoke(inner_loc))
+ − 2385 {
+ − 2386 assert(inner_loc.iter() == inner_loc.end());
+ − 2387 break; // empty line found.
+ − 2388 }
+ − 2389 auto com = lex_comment::invoke(inner_loc).unwrap().str();
+ − 2390 com.erase(com.begin()); // remove # sign
+ − 2391 comments.push_back(std::move(com));
+ − 2392 lex_newline::invoke(inner_loc);
+ − 2393 }
+ − 2394 }
+ − 2395
+ − 2396 table_type data;
+ − 2397 // root object is also a table, but without [tablename]
+ − 2398 if(const auto tab = parse_ml_table<value_type>(loc))
+ − 2399 {
+ − 2400 data = std::move(tab.unwrap());
+ − 2401 }
+ − 2402 else // failed (empty table is regarded as success in parse_ml_table)
+ − 2403 {
+ − 2404 return err(tab.unwrap_err());
+ − 2405 }
+ − 2406 while(loc.iter() != loc.end())
+ − 2407 {
+ − 2408 // here, the region of [table] is regarded as the table-key because
+ − 2409 // the table body is normally too big and it is not so informative
+ − 2410 // if the first key-value pair of the table is shown in the error
+ − 2411 // message.
+ − 2412 if(const auto tabkey = parse_array_table_key(loc))
+ − 2413 {
+ − 2414 const auto tab = parse_ml_table<value_type>(loc);
+ − 2415 if(!tab){return err(tab.unwrap_err());}
+ − 2416
+ − 2417 const auto& tk = tabkey.unwrap();
+ − 2418 const auto& keys = tk.first;
+ − 2419 const auto& reg = tk.second;
+ − 2420
+ − 2421 const auto inserted = insert_nested_key(data,
+ − 2422 value_type(tab.unwrap(), reg, reg.comments()),
+ − 2423 keys.begin(), keys.end(), reg,
+ − 2424 /*is_array_of_table=*/ true);
+ − 2425 if(!inserted) {return err(inserted.unwrap_err());}
+ − 2426
+ − 2427 continue;
+ − 2428 }
+ − 2429 if(const auto tabkey = parse_table_key(loc))
+ − 2430 {
+ − 2431 const auto tab = parse_ml_table<value_type>(loc);
+ − 2432 if(!tab){return err(tab.unwrap_err());}
+ − 2433
+ − 2434 const auto& tk = tabkey.unwrap();
+ − 2435 const auto& keys = tk.first;
+ − 2436 const auto& reg = tk.second;
+ − 2437
+ − 2438 const auto inserted = insert_nested_key(data,
+ − 2439 value_type(tab.unwrap(), reg, reg.comments()),
+ − 2440 keys.begin(), keys.end(), reg);
+ − 2441 if(!inserted) {return err(inserted.unwrap_err());}
+ − 2442
+ − 2443 continue;
+ − 2444 }
+ − 2445 return err(format_underline("toml::parse_toml_file: "
+ − 2446 "unknown line appeared", {{source_location(loc), "unknown format"}}));
+ − 2447 }
+ − 2448
+ − 2449 return ok(Value(std::move(data), file, comments));
+ − 2450 }
+ − 2451
+ − 2452 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2453 template<typename ...> class Table = std::unordered_map,
+ − 2454 template<typename ...> class Array = std::vector>
+ − 2455 basic_value<Comment, Table, Array>
+ − 2456 parse(std::vector<char>& letters, const std::string& fname)
+ − 2457 {
+ − 2458 using value_type = basic_value<Comment, Table, Array>;
+ − 2459
+ − 2460 // append LF.
+ − 2461 // Although TOML does not require LF at the EOF, to make parsing logic
+ − 2462 // simpler, we "normalize" the content by adding LF if it does not exist.
+ − 2463 // It also checks if the last char is CR, to avoid changing the meaning.
+ − 2464 // This is not the *best* way to deal with the last character, but is a
+ − 2465 // simple and quick fix.
+ − 2466 if(!letters.empty() && letters.back() != '\n' && letters.back() != '\r')
+ − 2467 {
+ − 2468 letters.push_back('\n');
+ − 2469 }
+ − 2470
+ − 2471 detail::location loc(std::move(fname), std::move(letters));
+ − 2472
+ − 2473 // skip BOM if exists.
+ − 2474 // XXX component of BOM (like 0xEF) exceeds the representable range of
+ − 2475 // signed char, so on some (actually, most) of the environment, these cannot
+ − 2476 // be compared to char. However, since we are always out of luck, we need to
+ − 2477 // check our chars are equivalent to BOM. To do this, first we need to
+ − 2478 // convert char to unsigned char to guarantee the comparability.
+ − 2479 if(loc.source()->size() >= 3)
+ − 2480 {
+ − 2481 std::array<unsigned char, 3> BOM;
+ − 2482 std::memcpy(BOM.data(), loc.source()->data(), 3);
+ − 2483 if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF)
+ − 2484 {
+ − 2485 loc.advance(3); // BOM found. skip.
+ − 2486 }
+ − 2487 }
+ − 2488
+ − 2489 if (auto data = detail::parse_toml_file<value_type>(loc))
+ − 2490 {
+ − 2491 return std::move(data).unwrap();
+ − 2492 }
+ − 2493 else
+ − 2494 {
+ − 2495 throw syntax_error(std::move(data).unwrap_err(), source_location(loc));
+ − 2496 }
+ − 2497 }
+ − 2498
+ − 2499 } // detail
+ − 2500
+ − 2501 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2502 template<typename ...> class Table = std::unordered_map,
+ − 2503 template<typename ...> class Array = std::vector>
+ − 2504 basic_value<Comment, Table, Array>
+ − 2505 parse(FILE * file, const std::string& fname)
+ − 2506 {
+ − 2507 const long beg = std::ftell(file);
+ − 2508 if (beg == -1l)
+ − 2509 {
+ − 2510 throw file_io_error(errno, "Failed to access", fname);
+ − 2511 }
+ − 2512
+ − 2513 const int res_seekend = std::fseek(file, 0, SEEK_END);
+ − 2514 if (res_seekend != 0)
+ − 2515 {
+ − 2516 throw file_io_error(errno, "Failed to seek", fname);
+ − 2517 }
+ − 2518
+ − 2519 const long end = std::ftell(file);
+ − 2520 if (end == -1l)
+ − 2521 {
+ − 2522 throw file_io_error(errno, "Failed to access", fname);
+ − 2523 }
+ − 2524
+ − 2525 const auto fsize = end - beg;
+ − 2526
+ − 2527 const auto res_seekbeg = std::fseek(file, beg, SEEK_SET);
+ − 2528 if (res_seekbeg != 0)
+ − 2529 {
+ − 2530 throw file_io_error(errno, "Failed to seek", fname);
+ − 2531 }
+ − 2532
+ − 2533 // read whole file as a sequence of char
+ − 2534 assert(fsize >= 0);
+ − 2535 std::vector<char> letters(static_cast<std::size_t>(fsize));
+ − 2536 std::fread(letters.data(), sizeof(char), static_cast<std::size_t>(fsize), file);
+ − 2537
+ − 2538 return detail::parse<Comment, Table, Array>(letters, fname);
+ − 2539 }
+ − 2540
+ − 2541 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2542 template<typename ...> class Table = std::unordered_map,
+ − 2543 template<typename ...> class Array = std::vector>
+ − 2544 basic_value<Comment, Table, Array>
+ − 2545 parse(std::istream& is, std::string fname = "unknown file")
+ − 2546 {
+ − 2547 const auto beg = is.tellg();
+ − 2548 is.seekg(0, std::ios::end);
+ − 2549 const auto end = is.tellg();
+ − 2550 const auto fsize = end - beg;
+ − 2551 is.seekg(beg);
+ − 2552
+ − 2553 // read whole file as a sequence of char
+ − 2554 assert(fsize >= 0);
+ − 2555 std::vector<char> letters(static_cast<std::size_t>(fsize));
+ − 2556 is.read(letters.data(), fsize);
+ − 2557
+ − 2558 return detail::parse<Comment, Table, Array>(letters, fname);
+ − 2559 }
+ − 2560
+ − 2561 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2562 template<typename ...> class Table = std::unordered_map,
+ − 2563 template<typename ...> class Array = std::vector>
+ − 2564 basic_value<Comment, Table, Array> parse(std::string fname)
+ − 2565 {
+ − 2566 std::ifstream ifs(fname, std::ios_base::binary);
+ − 2567 if(!ifs.good())
+ − 2568 {
+ − 2569 throw std::ios_base::failure(
+ − 2570 "toml::parse: Error opening file \"" + fname + "\"");
+ − 2571 }
+ − 2572 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+ − 2573 return parse<Comment, Table, Array>(ifs, std::move(fname));
+ − 2574 }
+ − 2575
+ − 2576 #ifdef TOML11_HAS_STD_FILESYSTEM
+ − 2577 // This function just forwards `parse("filename.toml")` to std::string version
+ − 2578 // to avoid the ambiguity in overload resolution.
+ − 2579 //
+ − 2580 // Both std::string and std::filesystem::path are convertible from const char*.
+ − 2581 // Without this, both parse(std::string) and parse(std::filesystem::path)
+ − 2582 // matches to parse("filename.toml"). This breaks the existing code.
+ − 2583 //
+ − 2584 // This function exactly matches to the invocation with c-string.
+ − 2585 // So this function is preferred than others and the ambiguity disappears.
+ − 2586 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2587 template<typename ...> class Table = std::unordered_map,
+ − 2588 template<typename ...> class Array = std::vector>
+ − 2589 basic_value<Comment, Table, Array> parse(const char* fname)
+ − 2590 {
+ − 2591 return parse<Comment, Table, Array>(std::string(fname));
+ − 2592 }
+ − 2593
+ − 2594 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
+ − 2595 template<typename ...> class Table = std::unordered_map,
+ − 2596 template<typename ...> class Array = std::vector>
+ − 2597 basic_value<Comment, Table, Array> parse(const std::filesystem::path& fpath)
+ − 2598 {
+ − 2599 std::ifstream ifs(fpath, std::ios_base::binary);
+ − 2600 if(!ifs.good())
+ − 2601 {
+ − 2602 throw std::ios_base::failure(
+ − 2603 "toml::parse: Error opening file \"" + fpath.string() + "\"");
+ − 2604 }
+ − 2605 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+ − 2606 return parse<Comment, Table, Array>(ifs, fpath.string());
+ − 2607 }
+ − 2608 #endif // TOML11_HAS_STD_FILESYSTEM
+ − 2609
+ − 2610 } // toml
+ − 2611 #endif// TOML11_PARSER_HPP