Mercurial > minori
comparison dep/toml11/toml/parser.hpp @ 318:3b355fa948c7
config: use TOML instead of INI
unfortunately, INI is not enough, and causes some paths including
semicolons to break with our current storage of the library folders.
so, I decided to switch to TOML which does support real arrays...
author | Paper <paper@paper.us.eu.org> |
---|---|
date | Wed, 12 Jun 2024 05:25:41 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
317:b1f4d1867ab1 | 318:3b355fa948c7 |
---|---|
1 // Copyright Toru Niina 2017. | |
2 // Distributed under the MIT License. | |
3 #ifndef TOML11_PARSER_HPP | |
4 #define TOML11_PARSER_HPP | |
5 #include <cstring> | |
6 #include <fstream> | |
7 #include <sstream> | |
8 | |
9 #include "combinator.hpp" | |
10 #include "lexer.hpp" | |
11 #include "macros.hpp" | |
12 #include "region.hpp" | |
13 #include "result.hpp" | |
14 #include "types.hpp" | |
15 #include "value.hpp" | |
16 | |
17 #ifndef TOML11_DISABLE_STD_FILESYSTEM | |
18 #ifdef __cpp_lib_filesystem | |
19 #if __has_include(<filesystem>) | |
20 #define TOML11_HAS_STD_FILESYSTEM | |
21 #include <filesystem> | |
22 #endif // has_include(<string_view>) | |
23 #endif // __cpp_lib_filesystem | |
24 #endif // TOML11_DISABLE_STD_FILESYSTEM | |
25 | |
26 // the previous commit works with 500+ recursions. so it may be too small. | |
27 // but in most cases, i think we don't need such a deep recursion of | |
28 // arrays or inline-tables. | |
29 #define TOML11_VALUE_RECURSION_LIMIT 64 | |
30 | |
31 namespace toml | |
32 { | |
33 namespace detail | |
34 { | |
35 | |
36 inline result<std::pair<boolean, region>, std::string> | |
37 parse_boolean(location& loc) | |
38 { | |
39 const auto first = loc.iter(); | |
40 if(const auto token = lex_boolean::invoke(loc)) | |
41 { | |
42 const auto reg = token.unwrap(); | |
43 if (reg.str() == "true") {return ok(std::make_pair(true, reg));} | |
44 else if(reg.str() == "false") {return ok(std::make_pair(false, reg));} | |
45 else // internal error. | |
46 { | |
47 throw internal_error(format_underline( | |
48 "toml::parse_boolean: internal error", | |
49 {{source_location(reg), "invalid token"}}), | |
50 source_location(reg)); | |
51 } | |
52 } | |
53 loc.reset(first); //rollback | |
54 return err(format_underline("toml::parse_boolean: ", | |
55 {{source_location(loc), "the next token is not a boolean"}})); | |
56 } | |
57 | |
58 inline result<std::pair<integer, region>, std::string> | |
59 parse_binary_integer(location& loc) | |
60 { | |
61 const auto first = loc.iter(); | |
62 if(const auto token = lex_bin_int::invoke(loc)) | |
63 { | |
64 auto str = token.unwrap().str(); | |
65 assert(str.size() > 2); // minimum -> 0b1 | |
66 assert(str.at(0) == '0' && str.at(1) == 'b'); | |
67 | |
68 // skip all the zeros and `_` locating at the MSB | |
69 str.erase(str.begin(), std::find_if( | |
70 str.begin() + 2, // to skip prefix `0b` | |
71 str.end(), | |
72 [](const char c) { return c == '1'; }) | |
73 ); | |
74 assert(str.empty() || str.front() == '1'); | |
75 | |
76 // since toml11 uses int64_t, 64bit (unsigned) input cannot be read. | |
77 const auto max_length = 63 + std::count(str.begin(), str.end(), '_'); | |
78 if(static_cast<std::string::size_type>(max_length) < str.size()) | |
79 { | |
80 loc.reset(first); | |
81 return err(format_underline("toml::parse_binary_integer: " | |
82 "only signed 64bit integer is available", | |
83 {{source_location(loc), "too large input (> int64_t)"}})); | |
84 } | |
85 | |
86 integer retval(0), base(1); | |
87 for(auto i(str.rbegin()), e(str.rend()); i!=e; ++i) | |
88 { | |
89 assert(base != 0); // means overflow, checked in the above code | |
90 if(*i == '1') | |
91 { | |
92 retval += base; | |
93 if( (std::numeric_limits<integer>::max)() / 2 < base ) | |
94 { | |
95 base = 0; | |
96 } | |
97 base *= 2; | |
98 } | |
99 else if(*i == '0') | |
100 { | |
101 if( (std::numeric_limits<integer>::max)() / 2 < base ) | |
102 { | |
103 base = 0; | |
104 } | |
105 base *= 2; | |
106 } | |
107 else if(*i == '_') | |
108 { | |
109 // do nothing. | |
110 } | |
111 else // should be detected by lex_bin_int. [[unlikely]] | |
112 { | |
113 throw internal_error(format_underline( | |
114 "toml::parse_binary_integer: internal error", | |
115 {{source_location(token.unwrap()), "invalid token"}}), | |
116 source_location(loc)); | |
117 } | |
118 } | |
119 return ok(std::make_pair(retval, token.unwrap())); | |
120 } | |
121 loc.reset(first); | |
122 return err(format_underline("toml::parse_binary_integer:", | |
123 {{source_location(loc), "the next token is not an integer"}})); | |
124 } | |
125 | |
126 inline result<std::pair<integer, region>, std::string> | |
127 parse_octal_integer(location& loc) | |
128 { | |
129 const auto first = loc.iter(); | |
130 if(const auto token = lex_oct_int::invoke(loc)) | |
131 { | |
132 auto str = token.unwrap().str(); | |
133 str.erase(std::remove(str.begin(), str.end(), '_'), str.end()); | |
134 str.erase(str.begin()); str.erase(str.begin()); // remove `0o` prefix | |
135 | |
136 std::istringstream iss(str); | |
137 integer retval(0); | |
138 iss >> std::oct >> retval; | |
139 if(iss.fail()) | |
140 { | |
141 // `istream` sets `failbit` if internally-called `std::num_get::get` | |
142 // fails. | |
143 // `std::num_get::get` calls `std::strtoll` if the argument type is | |
144 // signed. | |
145 // `std::strtoll` fails if | |
146 // - the value is out_of_range or | |
147 // - no conversion is possible. | |
148 // since we already checked that the string is valid octal integer, | |
149 // so the error reason is out_of_range. | |
150 loc.reset(first); | |
151 return err(format_underline("toml::parse_octal_integer:", | |
152 {{source_location(loc), "out of range"}})); | |
153 } | |
154 return ok(std::make_pair(retval, token.unwrap())); | |
155 } | |
156 loc.reset(first); | |
157 return err(format_underline("toml::parse_octal_integer:", | |
158 {{source_location(loc), "the next token is not an integer"}})); | |
159 } | |
160 | |
161 inline result<std::pair<integer, region>, std::string> | |
162 parse_hexadecimal_integer(location& loc) | |
163 { | |
164 const auto first = loc.iter(); | |
165 if(const auto token = lex_hex_int::invoke(loc)) | |
166 { | |
167 auto str = token.unwrap().str(); | |
168 str.erase(std::remove(str.begin(), str.end(), '_'), str.end()); | |
169 str.erase(str.begin()); str.erase(str.begin()); // remove `0x` prefix | |
170 | |
171 std::istringstream iss(str); | |
172 integer retval(0); | |
173 iss >> std::hex >> retval; | |
174 if(iss.fail()) | |
175 { | |
176 // see parse_octal_integer for detail of this error message. | |
177 loc.reset(first); | |
178 return err(format_underline("toml::parse_hexadecimal_integer:", | |
179 {{source_location(loc), "out of range"}})); | |
180 } | |
181 return ok(std::make_pair(retval, token.unwrap())); | |
182 } | |
183 loc.reset(first); | |
184 return err(format_underline("toml::parse_hexadecimal_integer", | |
185 {{source_location(loc), "the next token is not an integer"}})); | |
186 } | |
187 | |
188 inline result<std::pair<integer, region>, std::string> | |
189 parse_integer(location& loc) | |
190 { | |
191 const auto first = loc.iter(); | |
192 if(first != loc.end() && *first == '0') | |
193 { | |
194 const auto second = std::next(first); | |
195 if(second == loc.end()) // the token is just zero. | |
196 { | |
197 loc.advance(); | |
198 return ok(std::make_pair(0, region(loc, first, second))); | |
199 } | |
200 | |
201 if(*second == 'b') {return parse_binary_integer (loc);} // 0b1100 | |
202 if(*second == 'o') {return parse_octal_integer (loc);} // 0o775 | |
203 if(*second == 'x') {return parse_hexadecimal_integer(loc);} // 0xC0FFEE | |
204 | |
205 if(std::isdigit(*second)) | |
206 { | |
207 return err(format_underline("toml::parse_integer: " | |
208 "leading zero in an Integer is not allowed.", | |
209 {{source_location(loc), "leading zero"}})); | |
210 } | |
211 else if(std::isalpha(*second)) | |
212 { | |
213 return err(format_underline("toml::parse_integer: " | |
214 "unknown integer prefix appeared.", | |
215 {{source_location(loc), "none of 0x, 0o, 0b"}})); | |
216 } | |
217 } | |
218 | |
219 if(const auto token = lex_dec_int::invoke(loc)) | |
220 { | |
221 auto str = token.unwrap().str(); | |
222 str.erase(std::remove(str.begin(), str.end(), '_'), str.end()); | |
223 | |
224 std::istringstream iss(str); | |
225 integer retval(0); | |
226 iss >> retval; | |
227 if(iss.fail()) | |
228 { | |
229 // see parse_octal_integer for detail of this error message. | |
230 loc.reset(first); | |
231 return err(format_underline("toml::parse_integer:", | |
232 {{source_location(loc), "out of range"}})); | |
233 } | |
234 return ok(std::make_pair(retval, token.unwrap())); | |
235 } | |
236 loc.reset(first); | |
237 return err(format_underline("toml::parse_integer: ", | |
238 {{source_location(loc), "the next token is not an integer"}})); | |
239 } | |
240 | |
241 inline result<std::pair<floating, region>, std::string> | |
242 parse_floating(location& loc) | |
243 { | |
244 const auto first = loc.iter(); | |
245 if(const auto token = lex_float::invoke(loc)) | |
246 { | |
247 auto str = token.unwrap().str(); | |
248 if(str == "inf" || str == "+inf") | |
249 { | |
250 if(std::numeric_limits<floating>::has_infinity) | |
251 { | |
252 return ok(std::make_pair( | |
253 std::numeric_limits<floating>::infinity(), token.unwrap())); | |
254 } | |
255 else | |
256 { | |
257 throw std::domain_error("toml::parse_floating: inf value found" | |
258 " but the current environment does not support inf. Please" | |
259 " make sure that the floating-point implementation conforms" | |
260 " IEEE 754/ISO 60559 international standard."); | |
261 } | |
262 } | |
263 else if(str == "-inf") | |
264 { | |
265 if(std::numeric_limits<floating>::has_infinity) | |
266 { | |
267 return ok(std::make_pair( | |
268 -std::numeric_limits<floating>::infinity(), token.unwrap())); | |
269 } | |
270 else | |
271 { | |
272 throw std::domain_error("toml::parse_floating: inf value found" | |
273 " but the current environment does not support inf. Please" | |
274 " make sure that the floating-point implementation conforms" | |
275 " IEEE 754/ISO 60559 international standard."); | |
276 } | |
277 } | |
278 else if(str == "nan" || str == "+nan") | |
279 { | |
280 if(std::numeric_limits<floating>::has_quiet_NaN) | |
281 { | |
282 return ok(std::make_pair( | |
283 std::numeric_limits<floating>::quiet_NaN(), token.unwrap())); | |
284 } | |
285 else if(std::numeric_limits<floating>::has_signaling_NaN) | |
286 { | |
287 return ok(std::make_pair( | |
288 std::numeric_limits<floating>::signaling_NaN(), token.unwrap())); | |
289 } | |
290 else | |
291 { | |
292 throw std::domain_error("toml::parse_floating: NaN value found" | |
293 " but the current environment does not support NaN. Please" | |
294 " make sure that the floating-point implementation conforms" | |
295 " IEEE 754/ISO 60559 international standard."); | |
296 } | |
297 } | |
298 else if(str == "-nan") | |
299 { | |
300 if(std::numeric_limits<floating>::has_quiet_NaN) | |
301 { | |
302 return ok(std::make_pair( | |
303 -std::numeric_limits<floating>::quiet_NaN(), token.unwrap())); | |
304 } | |
305 else if(std::numeric_limits<floating>::has_signaling_NaN) | |
306 { | |
307 return ok(std::make_pair( | |
308 -std::numeric_limits<floating>::signaling_NaN(), token.unwrap())); | |
309 } | |
310 else | |
311 { | |
312 throw std::domain_error("toml::parse_floating: NaN value found" | |
313 " but the current environment does not support NaN. Please" | |
314 " make sure that the floating-point implementation conforms" | |
315 " IEEE 754/ISO 60559 international standard."); | |
316 } | |
317 } | |
318 str.erase(std::remove(str.begin(), str.end(), '_'), str.end()); | |
319 std::istringstream iss(str); | |
320 floating v(0.0); | |
321 iss >> v; | |
322 if(iss.fail()) | |
323 { | |
324 // see parse_octal_integer for detail of this error message. | |
325 loc.reset(first); | |
326 return err(format_underline("toml::parse_floating:", | |
327 {{source_location(loc), "out of range"}})); | |
328 } | |
329 return ok(std::make_pair(v, token.unwrap())); | |
330 } | |
331 loc.reset(first); | |
332 return err(format_underline("toml::parse_floating: ", | |
333 {{source_location(loc), "the next token is not a float"}})); | |
334 } | |
335 | |
336 inline std::string read_utf8_codepoint(const region& reg, const location& loc) | |
337 { | |
338 const auto str = reg.str().substr(1); | |
339 std::uint_least32_t codepoint; | |
340 std::istringstream iss(str); | |
341 iss >> std::hex >> codepoint; | |
342 | |
343 const auto to_char = [](const std::uint_least32_t i) noexcept -> char { | |
344 const auto uc = static_cast<unsigned char>(i); | |
345 return *reinterpret_cast<const char*>(std::addressof(uc)); | |
346 }; | |
347 | |
348 std::string character; | |
349 if(codepoint < 0x80) // U+0000 ... U+0079 ; just an ASCII. | |
350 { | |
351 character += static_cast<char>(codepoint); | |
352 } | |
353 else if(codepoint < 0x800) //U+0080 ... U+07FF | |
354 { | |
355 // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111 | |
356 character += to_char(0xC0| codepoint >> 6); | |
357 character += to_char(0x80|(codepoint & 0x3F)); | |
358 } | |
359 else if(codepoint < 0x10000) // U+0800...U+FFFF | |
360 { | |
361 if(0xD800 <= codepoint && codepoint <= 0xDFFF) | |
362 { | |
363 throw syntax_error(format_underline( | |
364 "toml::read_utf8_codepoint: codepoints in the range " | |
365 "[0xD800, 0xDFFF] are not valid UTF-8.", {{ | |
366 source_location(loc), "not a valid UTF-8 codepoint" | |
367 }}), source_location(loc)); | |
368 } | |
369 assert(codepoint < 0xD800 || 0xDFFF < codepoint); | |
370 // 1110yyyy 10yxxxxx 10xxxxxx | |
371 character += to_char(0xE0| codepoint >> 12); | |
372 character += to_char(0x80|(codepoint >> 6 & 0x3F)); | |
373 character += to_char(0x80|(codepoint & 0x3F)); | |
374 } | |
375 else if(codepoint < 0x110000) // U+010000 ... U+10FFFF | |
376 { | |
377 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx | |
378 character += to_char(0xF0| codepoint >> 18); | |
379 character += to_char(0x80|(codepoint >> 12 & 0x3F)); | |
380 character += to_char(0x80|(codepoint >> 6 & 0x3F)); | |
381 character += to_char(0x80|(codepoint & 0x3F)); | |
382 } | |
383 else // out of UTF-8 region | |
384 { | |
385 throw syntax_error(format_underline("toml::read_utf8_codepoint:" | |
386 " input codepoint is too large.", | |
387 {{source_location(loc), "should be in [0x00..0x10FFFF]"}}), | |
388 source_location(loc)); | |
389 } | |
390 return character; | |
391 } | |
392 | |
393 inline result<std::string, std::string> parse_escape_sequence(location& loc) | |
394 { | |
395 const auto first = loc.iter(); | |
396 if(first == loc.end() || *first != '\\') | |
397 { | |
398 return err(format_underline("toml::parse_escape_sequence: ", {{ | |
399 source_location(loc), "the next token is not a backslash \"\\\""}})); | |
400 } | |
401 loc.advance(); | |
402 switch(*loc.iter()) | |
403 { | |
404 case '\\':{loc.advance(); return ok(std::string("\\"));} | |
405 case '"' :{loc.advance(); return ok(std::string("\""));} | |
406 case 'b' :{loc.advance(); return ok(std::string("\b"));} | |
407 case 't' :{loc.advance(); return ok(std::string("\t"));} | |
408 case 'n' :{loc.advance(); return ok(std::string("\n"));} | |
409 case 'f' :{loc.advance(); return ok(std::string("\f"));} | |
410 case 'r' :{loc.advance(); return ok(std::string("\r"));} | |
411 #ifdef TOML11_USE_UNRELEASED_TOML_FEATURES | |
412 case 'e' :{loc.advance(); return ok(std::string("\x1b"));} // ESC | |
413 #endif | |
414 case 'u' : | |
415 { | |
416 if(const auto token = lex_escape_unicode_short::invoke(loc)) | |
417 { | |
418 return ok(read_utf8_codepoint(token.unwrap(), loc)); | |
419 } | |
420 else | |
421 { | |
422 return err(format_underline("parse_escape_sequence: " | |
423 "invalid token found in UTF-8 codepoint uXXXX.", | |
424 {{source_location(loc), "here"}})); | |
425 } | |
426 } | |
427 case 'U': | |
428 { | |
429 if(const auto token = lex_escape_unicode_long::invoke(loc)) | |
430 { | |
431 return ok(read_utf8_codepoint(token.unwrap(), loc)); | |
432 } | |
433 else | |
434 { | |
435 return err(format_underline("parse_escape_sequence: " | |
436 "invalid token found in UTF-8 codepoint Uxxxxxxxx", | |
437 {{source_location(loc), "here"}})); | |
438 } | |
439 } | |
440 } | |
441 | |
442 const auto msg = format_underline("parse_escape_sequence: " | |
443 "unknown escape sequence appeared.", {{source_location(loc), | |
444 "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}}, | |
445 /* Hints = */{"if you want to write backslash as just one backslash, " | |
446 "use literal string like: regex = '<\\i\\c*\\s*>'"}); | |
447 loc.reset(first); | |
448 return err(msg); | |
449 } | |
450 | |
451 inline std::ptrdiff_t check_utf8_validity(const std::string& reg) | |
452 { | |
453 location loc("tmp", reg); | |
454 const auto u8 = repeat<lex_utf8_code, unlimited>::invoke(loc); | |
455 if(!u8 || loc.iter() != loc.end()) | |
456 { | |
457 const auto error_location = std::distance(loc.begin(), loc.iter()); | |
458 assert(0 <= error_location); | |
459 return error_location; | |
460 } | |
461 return -1; | |
462 } | |
463 | |
464 inline result<std::pair<toml::string, region>, std::string> | |
465 parse_ml_basic_string(location& loc) | |
466 { | |
467 const auto first = loc.iter(); | |
468 if(const auto token = lex_ml_basic_string::invoke(loc)) | |
469 { | |
470 auto inner_loc = loc; | |
471 inner_loc.reset(first); | |
472 | |
473 std::string retval; | |
474 retval.reserve(token.unwrap().size()); | |
475 | |
476 auto delim = lex_ml_basic_string_open::invoke(inner_loc); | |
477 if(!delim) | |
478 { | |
479 throw internal_error(format_underline( | |
480 "parse_ml_basic_string: invalid token", | |
481 {{source_location(inner_loc), "should be \"\"\""}}), | |
482 source_location(inner_loc)); | |
483 } | |
484 // immediate newline is ignored (if exists) | |
485 /* discard return value */ lex_newline::invoke(inner_loc); | |
486 | |
487 delim = none(); | |
488 while(!delim) | |
489 { | |
490 using lex_unescaped_seq = repeat< | |
491 either<lex_ml_basic_unescaped, lex_newline>, unlimited>; | |
492 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc)) | |
493 { | |
494 retval += unescaped.unwrap().str(); | |
495 } | |
496 if(auto escaped = parse_escape_sequence(inner_loc)) | |
497 { | |
498 retval += escaped.unwrap(); | |
499 } | |
500 if(auto esc_nl = lex_ml_basic_escaped_newline::invoke(inner_loc)) | |
501 { | |
502 // ignore newline after escape until next non-ws char | |
503 } | |
504 if(inner_loc.iter() == inner_loc.end()) | |
505 { | |
506 throw internal_error(format_underline( | |
507 "parse_ml_basic_string: unexpected end of region", | |
508 {{source_location(inner_loc), "not sufficient token"}}), | |
509 source_location(inner_loc)); | |
510 } | |
511 delim = lex_ml_basic_string_close::invoke(inner_loc); | |
512 } | |
513 // `lex_ml_basic_string_close` allows 3 to 5 `"`s to allow 1 or 2 `"`s | |
514 // at just before the delimiter. Here, we need to attach `"`s at the | |
515 // end of the string body, if it exists. | |
516 // For detail, see the definition of `lex_ml_basic_string_close`. | |
517 assert(std::all_of(delim.unwrap().first(), delim.unwrap().last(), | |
518 [](const char c) noexcept {return c == '\"';})); | |
519 switch(delim.unwrap().size()) | |
520 { | |
521 case 3: {break;} | |
522 case 4: {retval += "\""; break;} | |
523 case 5: {retval += "\"\""; break;} | |
524 default: | |
525 { | |
526 throw internal_error(format_underline( | |
527 "parse_ml_basic_string: closing delimiter has invalid length", | |
528 {{source_location(inner_loc), "end of this"}}), | |
529 source_location(inner_loc)); | |
530 } | |
531 } | |
532 | |
533 const auto err_loc = check_utf8_validity(token.unwrap().str()); | |
534 if(err_loc == -1) | |
535 { | |
536 return ok(std::make_pair(toml::string(retval), token.unwrap())); | |
537 } | |
538 else | |
539 { | |
540 inner_loc.reset(first); | |
541 inner_loc.advance(err_loc); | |
542 throw syntax_error(format_underline( | |
543 "parse_ml_basic_string: invalid utf8 sequence found", | |
544 {{source_location(inner_loc), "here"}}), | |
545 source_location(inner_loc)); | |
546 } | |
547 } | |
548 else | |
549 { | |
550 loc.reset(first); | |
551 return err(format_underline("toml::parse_ml_basic_string: " | |
552 "the next token is not a valid multiline string", | |
553 {{source_location(loc), "here"}})); | |
554 } | |
555 } | |
556 | |
557 inline result<std::pair<toml::string, region>, std::string> | |
558 parse_basic_string(location& loc) | |
559 { | |
560 const auto first = loc.iter(); | |
561 if(const auto token = lex_basic_string::invoke(loc)) | |
562 { | |
563 auto inner_loc = loc; | |
564 inner_loc.reset(first); | |
565 | |
566 auto quot = lex_quotation_mark::invoke(inner_loc); | |
567 if(!quot) | |
568 { | |
569 throw internal_error(format_underline("parse_basic_string: " | |
570 "invalid token", {{source_location(inner_loc), "should be \""}}), | |
571 source_location(inner_loc)); | |
572 } | |
573 | |
574 std::string retval; | |
575 retval.reserve(token.unwrap().size()); | |
576 | |
577 quot = none(); | |
578 while(!quot) | |
579 { | |
580 using lex_unescaped_seq = repeat<lex_basic_unescaped, unlimited>; | |
581 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc)) | |
582 { | |
583 retval += unescaped.unwrap().str(); | |
584 } | |
585 if(auto escaped = parse_escape_sequence(inner_loc)) | |
586 { | |
587 retval += escaped.unwrap(); | |
588 } | |
589 if(inner_loc.iter() == inner_loc.end()) | |
590 { | |
591 throw internal_error(format_underline( | |
592 "parse_basic_string: unexpected end of region", | |
593 {{source_location(inner_loc), "not sufficient token"}}), | |
594 source_location(inner_loc)); | |
595 } | |
596 quot = lex_quotation_mark::invoke(inner_loc); | |
597 } | |
598 | |
599 const auto err_loc = check_utf8_validity(token.unwrap().str()); | |
600 if(err_loc == -1) | |
601 { | |
602 return ok(std::make_pair(toml::string(retval), token.unwrap())); | |
603 } | |
604 else | |
605 { | |
606 inner_loc.reset(first); | |
607 inner_loc.advance(err_loc); | |
608 throw syntax_error(format_underline( | |
609 "parse_basic_string: invalid utf8 sequence found", | |
610 {{source_location(inner_loc), "here"}}), | |
611 source_location(inner_loc)); | |
612 } | |
613 } | |
614 else | |
615 { | |
616 loc.reset(first); // rollback | |
617 return err(format_underline("toml::parse_basic_string: " | |
618 "the next token is not a valid string", | |
619 {{source_location(loc), "here"}})); | |
620 } | |
621 } | |
622 | |
623 inline result<std::pair<toml::string, region>, std::string> | |
624 parse_ml_literal_string(location& loc) | |
625 { | |
626 const auto first = loc.iter(); | |
627 if(const auto token = lex_ml_literal_string::invoke(loc)) | |
628 { | |
629 auto inner_loc = loc; | |
630 inner_loc.reset(first); | |
631 | |
632 const auto open = lex_ml_literal_string_open::invoke(inner_loc); | |
633 if(!open) | |
634 { | |
635 throw internal_error(format_underline( | |
636 "parse_ml_literal_string: invalid token", | |
637 {{source_location(inner_loc), "should be '''"}}), | |
638 source_location(inner_loc)); | |
639 } | |
640 // immediate newline is ignored (if exists) | |
641 /* discard return value */ lex_newline::invoke(inner_loc); | |
642 | |
643 const auto body = lex_ml_literal_body::invoke(inner_loc); | |
644 | |
645 const auto close = lex_ml_literal_string_close::invoke(inner_loc); | |
646 if(!close) | |
647 { | |
648 throw internal_error(format_underline( | |
649 "parse_ml_literal_string: invalid token", | |
650 {{source_location(inner_loc), "should be '''"}}), | |
651 source_location(inner_loc)); | |
652 } | |
653 // `lex_ml_literal_string_close` allows 3 to 5 `'`s to allow 1 or 2 `'`s | |
654 // at just before the delimiter. Here, we need to attach `'`s at the | |
655 // end of the string body, if it exists. | |
656 // For detail, see the definition of `lex_ml_basic_string_close`. | |
657 | |
658 std::string retval = body.unwrap().str(); | |
659 assert(std::all_of(close.unwrap().first(), close.unwrap().last(), | |
660 [](const char c) noexcept {return c == '\'';})); | |
661 switch(close.unwrap().size()) | |
662 { | |
663 case 3: {break;} | |
664 case 4: {retval += "'"; break;} | |
665 case 5: {retval += "''"; break;} | |
666 default: | |
667 { | |
668 throw internal_error(format_underline( | |
669 "parse_ml_literal_string: closing delimiter has invalid length", | |
670 {{source_location(inner_loc), "end of this"}}), | |
671 source_location(inner_loc)); | |
672 } | |
673 } | |
674 | |
675 const auto err_loc = check_utf8_validity(token.unwrap().str()); | |
676 if(err_loc == -1) | |
677 { | |
678 return ok(std::make_pair(toml::string(retval, toml::string_t::literal), | |
679 token.unwrap())); | |
680 } | |
681 else | |
682 { | |
683 inner_loc.reset(first); | |
684 inner_loc.advance(err_loc); | |
685 throw syntax_error(format_underline( | |
686 "parse_ml_literal_string: invalid utf8 sequence found", | |
687 {{source_location(inner_loc), "here"}}), | |
688 source_location(inner_loc)); | |
689 } | |
690 } | |
691 else | |
692 { | |
693 loc.reset(first); // rollback | |
694 return err(format_underline("toml::parse_ml_literal_string: " | |
695 "the next token is not a valid multiline literal string", | |
696 {{source_location(loc), "here"}})); | |
697 } | |
698 } | |
699 | |
700 inline result<std::pair<toml::string, region>, std::string> | |
701 parse_literal_string(location& loc) | |
702 { | |
703 const auto first = loc.iter(); | |
704 if(const auto token = lex_literal_string::invoke(loc)) | |
705 { | |
706 auto inner_loc = loc; | |
707 inner_loc.reset(first); | |
708 | |
709 const auto open = lex_apostrophe::invoke(inner_loc); | |
710 if(!open) | |
711 { | |
712 throw internal_error(format_underline( | |
713 "parse_literal_string: invalid token", | |
714 {{source_location(inner_loc), "should be '"}}), | |
715 source_location(inner_loc)); | |
716 } | |
717 | |
718 const auto body = repeat<lex_literal_char, unlimited>::invoke(inner_loc); | |
719 | |
720 const auto close = lex_apostrophe::invoke(inner_loc); | |
721 if(!close) | |
722 { | |
723 throw internal_error(format_underline( | |
724 "parse_literal_string: invalid token", | |
725 {{source_location(inner_loc), "should be '"}}), | |
726 source_location(inner_loc)); | |
727 } | |
728 | |
729 const auto err_loc = check_utf8_validity(token.unwrap().str()); | |
730 if(err_loc == -1) | |
731 { | |
732 return ok(std::make_pair( | |
733 toml::string(body.unwrap().str(), toml::string_t::literal), | |
734 token.unwrap())); | |
735 } | |
736 else | |
737 { | |
738 inner_loc.reset(first); | |
739 inner_loc.advance(err_loc); | |
740 throw syntax_error(format_underline( | |
741 "parse_literal_string: invalid utf8 sequence found", | |
742 {{source_location(inner_loc), "here"}}), | |
743 source_location(inner_loc)); | |
744 } | |
745 } | |
746 else | |
747 { | |
748 loc.reset(first); // rollback | |
749 return err(format_underline("toml::parse_literal_string: " | |
750 "the next token is not a valid literal string", | |
751 {{source_location(loc), "here"}})); | |
752 } | |
753 } | |
754 | |
755 inline result<std::pair<toml::string, region>, std::string> | |
756 parse_string(location& loc) | |
757 { | |
758 if(loc.iter() != loc.end() && *(loc.iter()) == '"') | |
759 { | |
760 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '"' && | |
761 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '"') | |
762 { | |
763 return parse_ml_basic_string(loc); | |
764 } | |
765 else | |
766 { | |
767 return parse_basic_string(loc); | |
768 } | |
769 } | |
770 else if(loc.iter() != loc.end() && *(loc.iter()) == '\'') | |
771 { | |
772 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '\'' && | |
773 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '\'') | |
774 { | |
775 return parse_ml_literal_string(loc); | |
776 } | |
777 else | |
778 { | |
779 return parse_literal_string(loc); | |
780 } | |
781 } | |
782 return err(format_underline("toml::parse_string: ", | |
783 {{source_location(loc), "the next token is not a string"}})); | |
784 } | |
785 | |
786 inline result<std::pair<local_date, region>, std::string> | |
787 parse_local_date(location& loc) | |
788 { | |
789 const auto first = loc.iter(); | |
790 if(const auto token = lex_local_date::invoke(loc)) | |
791 { | |
792 location inner_loc(loc.name(), token.unwrap().str()); | |
793 | |
794 const auto y = lex_date_fullyear::invoke(inner_loc); | |
795 if(!y || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') | |
796 { | |
797 throw internal_error(format_underline( | |
798 "toml::parse_local_date: invalid year format", | |
799 {{source_location(inner_loc), "should be `-`"}}), | |
800 source_location(inner_loc)); | |
801 } | |
802 inner_loc.advance(); | |
803 const auto m = lex_date_month::invoke(inner_loc); | |
804 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-') | |
805 { | |
806 throw internal_error(format_underline( | |
807 "toml::parse_local_date: invalid month format", | |
808 {{source_location(inner_loc), "should be `-`"}}), | |
809 source_location(inner_loc)); | |
810 } | |
811 inner_loc.advance(); | |
812 const auto d = lex_date_mday::invoke(inner_loc); | |
813 if(!d) | |
814 { | |
815 throw internal_error(format_underline( | |
816 "toml::parse_local_date: invalid day format", | |
817 {{source_location(inner_loc), "here"}}), | |
818 source_location(inner_loc)); | |
819 } | |
820 | |
821 const auto year = static_cast<std::int16_t>(from_string<int>(y.unwrap().str(), 0)); | |
822 const auto month = static_cast<std::int8_t >(from_string<int>(m.unwrap().str(), 0)); | |
823 const auto day = static_cast<std::int8_t >(from_string<int>(d.unwrap().str(), 0)); | |
824 | |
825 // We briefly check whether the input date is valid or not. But here, we | |
826 // only check if the RFC3339 compliance. | |
827 // Actually there are several special date that does not exist, | |
828 // because of historical reasons, such as 1582/10/5-1582/10/14 (only in | |
829 // several countries). But here, we do not care about such a complicated | |
830 // rule. It makes the code complicated and there is only low probability | |
831 // that such a specific date is needed in practice. If someone need to | |
832 // validate date accurately, that means that the one need a specialized | |
833 // library for their purpose in a different layer. | |
834 { | |
835 const bool is_leap = (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0)); | |
836 const auto max_day = (month == 2) ? (is_leap ? 29 : 28) : | |
837 ((month == 4 || month == 6 || month == 9 || month == 11) ? 30 : 31); | |
838 | |
839 if((month < 1 || 12 < month) || (day < 1 || max_day < day)) | |
840 { | |
841 throw syntax_error(format_underline("toml::parse_date: " | |
842 "invalid date: it does not conform RFC3339.", {{ | |
843 source_location(loc), "month should be 01-12, day should be" | |
844 " 01-28,29,30,31, depending on month/year." | |
845 }}), source_location(inner_loc)); | |
846 } | |
847 } | |
848 return ok(std::make_pair(local_date(year, static_cast<month_t>(month - 1), day), | |
849 token.unwrap())); | |
850 } | |
851 else | |
852 { | |
853 loc.reset(first); | |
854 return err(format_underline("toml::parse_local_date: ", | |
855 {{source_location(loc), "the next token is not a local_date"}})); | |
856 } | |
857 } | |
858 | |
859 inline result<std::pair<local_time, region>, std::string> | |
860 parse_local_time(location& loc) | |
861 { | |
862 const auto first = loc.iter(); | |
863 if(const auto token = lex_local_time::invoke(loc)) | |
864 { | |
865 location inner_loc(loc.name(), token.unwrap().str()); | |
866 | |
867 const auto h = lex_time_hour::invoke(inner_loc); | |
868 if(!h || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') | |
869 { | |
870 throw internal_error(format_underline( | |
871 "toml::parse_local_time: invalid year format", | |
872 {{source_location(inner_loc), "should be `:`"}}), | |
873 source_location(inner_loc)); | |
874 } | |
875 inner_loc.advance(); | |
876 const auto m = lex_time_minute::invoke(inner_loc); | |
877 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':') | |
878 { | |
879 throw internal_error(format_underline( | |
880 "toml::parse_local_time: invalid month format", | |
881 {{source_location(inner_loc), "should be `:`"}}), | |
882 source_location(inner_loc)); | |
883 } | |
884 inner_loc.advance(); | |
885 const auto s = lex_time_second::invoke(inner_loc); | |
886 if(!s) | |
887 { | |
888 throw internal_error(format_underline( | |
889 "toml::parse_local_time: invalid second format", | |
890 {{source_location(inner_loc), "here"}}), | |
891 source_location(inner_loc)); | |
892 } | |
893 | |
894 const int hour = from_string<int>(h.unwrap().str(), 0); | |
895 const int minute = from_string<int>(m.unwrap().str(), 0); | |
896 const int second = from_string<int>(s.unwrap().str(), 0); | |
897 | |
898 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute) || | |
899 (second < 0 || 60 < second)) // it may be leap second | |
900 { | |
901 throw syntax_error(format_underline("toml::parse_local_time: " | |
902 "invalid time: it does not conform RFC3339.", {{ | |
903 source_location(loc), "hour should be 00-23, minute should be" | |
904 " 00-59, second should be 00-60 (depending on the leap" | |
905 " second rules.)"}}), source_location(inner_loc)); | |
906 } | |
907 | |
908 local_time time(hour, minute, second, 0, 0); | |
909 | |
910 const auto before_secfrac = inner_loc.iter(); | |
911 if(const auto secfrac = lex_time_secfrac::invoke(inner_loc)) | |
912 { | |
913 auto sf = secfrac.unwrap().str(); | |
914 sf.erase(sf.begin()); // sf.front() == '.' | |
915 switch(sf.size() % 3) | |
916 { | |
917 case 2: sf += '0'; break; | |
918 case 1: sf += "00"; break; | |
919 case 0: break; | |
920 default: break; | |
921 } | |
922 if(sf.size() >= 9) | |
923 { | |
924 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u); | |
925 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u); | |
926 time.nanosecond = from_string<std::uint16_t>(sf.substr(6, 3), 0u); | |
927 } | |
928 else if(sf.size() >= 6) | |
929 { | |
930 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u); | |
931 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u); | |
932 } | |
933 else if(sf.size() >= 3) | |
934 { | |
935 time.millisecond = from_string<std::uint16_t>(sf, 0u); | |
936 time.microsecond = 0u; | |
937 } | |
938 } | |
939 else | |
940 { | |
941 if(before_secfrac != inner_loc.iter()) | |
942 { | |
943 throw internal_error(format_underline( | |
944 "toml::parse_local_time: invalid subsecond format", | |
945 {{source_location(inner_loc), "here"}}), | |
946 source_location(inner_loc)); | |
947 } | |
948 } | |
949 return ok(std::make_pair(time, token.unwrap())); | |
950 } | |
951 else | |
952 { | |
953 loc.reset(first); | |
954 return err(format_underline("toml::parse_local_time: ", | |
955 {{source_location(loc), "the next token is not a local_time"}})); | |
956 } | |
957 } | |
958 | |
959 inline result<std::pair<local_datetime, region>, std::string> | |
960 parse_local_datetime(location& loc) | |
961 { | |
962 const auto first = loc.iter(); | |
963 if(const auto token = lex_local_date_time::invoke(loc)) | |
964 { | |
965 location inner_loc(loc.name(), token.unwrap().str()); | |
966 const auto date = parse_local_date(inner_loc); | |
967 if(!date || inner_loc.iter() == inner_loc.end()) | |
968 { | |
969 throw internal_error(format_underline( | |
970 "toml::parse_local_datetime: invalid datetime format", | |
971 {{source_location(inner_loc), "date, not datetime"}}), | |
972 source_location(inner_loc)); | |
973 } | |
974 const char delim = *(inner_loc.iter()); | |
975 if(delim != 'T' && delim != 't' && delim != ' ') | |
976 { | |
977 throw internal_error(format_underline( | |
978 "toml::parse_local_datetime: invalid datetime format", | |
979 {{source_location(inner_loc), "should be `T` or ` ` (space)"}}), | |
980 source_location(inner_loc)); | |
981 } | |
982 inner_loc.advance(); | |
983 const auto time = parse_local_time(inner_loc); | |
984 if(!time) | |
985 { | |
986 throw internal_error(format_underline( | |
987 "toml::parse_local_datetime: invalid datetime format", | |
988 {{source_location(inner_loc), "invalid time format"}}), | |
989 source_location(inner_loc)); | |
990 } | |
991 return ok(std::make_pair( | |
992 local_datetime(date.unwrap().first, time.unwrap().first), | |
993 token.unwrap())); | |
994 } | |
995 else | |
996 { | |
997 loc.reset(first); | |
998 return err(format_underline("toml::parse_local_datetime: ", | |
999 {{source_location(loc), "the next token is not a local_datetime"}})); | |
1000 } | |
1001 } | |
1002 | |
1003 inline result<std::pair<offset_datetime, region>, std::string> | |
1004 parse_offset_datetime(location& loc) | |
1005 { | |
1006 const auto first = loc.iter(); | |
1007 if(const auto token = lex_offset_date_time::invoke(loc)) | |
1008 { | |
1009 location inner_loc(loc.name(), token.unwrap().str()); | |
1010 const auto datetime = parse_local_datetime(inner_loc); | |
1011 if(!datetime || inner_loc.iter() == inner_loc.end()) | |
1012 { | |
1013 throw internal_error(format_underline( | |
1014 "toml::parse_offset_datetime: invalid datetime format", | |
1015 {{source_location(inner_loc), "date, not datetime"}}), | |
1016 source_location(inner_loc)); | |
1017 } | |
1018 time_offset offset(0, 0); | |
1019 if(const auto ofs = lex_time_numoffset::invoke(inner_loc)) | |
1020 { | |
1021 const auto str = ofs.unwrap().str(); | |
1022 | |
1023 const auto hour = from_string<int>(str.substr(1,2), 0); | |
1024 const auto minute = from_string<int>(str.substr(4,2), 0); | |
1025 | |
1026 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute)) | |
1027 { | |
1028 throw syntax_error(format_underline("toml::parse_offset_datetime: " | |
1029 "invalid offset: it does not conform RFC3339.", {{ | |
1030 source_location(loc), "month should be 01-12, day should be" | |
1031 " 01-28,29,30,31, depending on month/year." | |
1032 }}), source_location(inner_loc)); | |
1033 } | |
1034 | |
1035 if(str.front() == '+') | |
1036 { | |
1037 offset = time_offset(hour, minute); | |
1038 } | |
1039 else | |
1040 { | |
1041 offset = time_offset(-hour, -minute); | |
1042 } | |
1043 } | |
1044 else if(*inner_loc.iter() != 'Z' && *inner_loc.iter() != 'z') | |
1045 { | |
1046 throw internal_error(format_underline( | |
1047 "toml::parse_offset_datetime: invalid datetime format", | |
1048 {{source_location(inner_loc), "should be `Z` or `+HH:MM`"}}), | |
1049 source_location(inner_loc)); | |
1050 } | |
1051 return ok(std::make_pair(offset_datetime(datetime.unwrap().first, offset), | |
1052 token.unwrap())); | |
1053 } | |
1054 else | |
1055 { | |
1056 loc.reset(first); | |
1057 return err(format_underline("toml::parse_offset_datetime: ", | |
1058 {{source_location(loc), "the next token is not a offset_datetime"}})); | |
1059 } | |
1060 } | |
1061 | |
1062 inline result<std::pair<key, region>, std::string> | |
1063 parse_simple_key(location& loc) | |
1064 { | |
1065 if(const auto bstr = parse_basic_string(loc)) | |
1066 { | |
1067 return ok(std::make_pair(bstr.unwrap().first.str, bstr.unwrap().second)); | |
1068 } | |
1069 if(const auto lstr = parse_literal_string(loc)) | |
1070 { | |
1071 return ok(std::make_pair(lstr.unwrap().first.str, lstr.unwrap().second)); | |
1072 } | |
1073 if(const auto bare = lex_unquoted_key::invoke(loc)) | |
1074 { | |
1075 const auto reg = bare.unwrap(); | |
1076 return ok(std::make_pair(reg.str(), reg)); | |
1077 } | |
1078 return err(format_underline("toml::parse_simple_key: ", | |
1079 {{source_location(loc), "the next token is not a simple key"}})); | |
1080 } | |
1081 | |
1082 // dotted key become vector of keys | |
1083 inline result<std::pair<std::vector<key>, region>, std::string> | |
1084 parse_key(location& loc) | |
1085 { | |
1086 const auto first = loc.iter(); | |
1087 // dotted key -> `foo.bar.baz` where several single keys are chained by | |
1088 // dots. Whitespaces between keys and dots are allowed. | |
1089 if(const auto token = lex_dotted_key::invoke(loc)) | |
1090 { | |
1091 const auto reg = token.unwrap(); | |
1092 location inner_loc(loc.name(), reg.str()); | |
1093 std::vector<key> keys; | |
1094 | |
1095 while(inner_loc.iter() != inner_loc.end()) | |
1096 { | |
1097 lex_ws::invoke(inner_loc); | |
1098 if(const auto k = parse_simple_key(inner_loc)) | |
1099 { | |
1100 keys.push_back(k.unwrap().first); | |
1101 } | |
1102 else | |
1103 { | |
1104 throw internal_error(format_underline( | |
1105 "toml::parse_key: dotted key contains invalid key", | |
1106 {{source_location(inner_loc), k.unwrap_err()}}), | |
1107 source_location(inner_loc)); | |
1108 } | |
1109 | |
1110 lex_ws::invoke(inner_loc); | |
1111 if(inner_loc.iter() == inner_loc.end()) | |
1112 { | |
1113 break; | |
1114 } | |
1115 else if(*inner_loc.iter() == '.') | |
1116 { | |
1117 inner_loc.advance(); // to skip `.` | |
1118 } | |
1119 else | |
1120 { | |
1121 throw internal_error(format_underline("toml::parse_key: " | |
1122 "dotted key contains invalid key ", | |
1123 {{source_location(inner_loc), "should be `.`"}}), | |
1124 source_location(inner_loc)); | |
1125 } | |
1126 } | |
1127 return ok(std::make_pair(keys, reg)); | |
1128 } | |
1129 loc.reset(first); | |
1130 | |
1131 // simple_key: a single (basic_string|literal_string|bare key) | |
1132 if(const auto smpl = parse_simple_key(loc)) | |
1133 { | |
1134 return ok(std::make_pair(std::vector<key>(1, smpl.unwrap().first), | |
1135 smpl.unwrap().second)); | |
1136 } | |
1137 return err(format_underline("toml::parse_key: an invalid key appeared.", | |
1138 {{source_location(loc), "is not a valid key"}}, { | |
1139 "bare keys : non-empty strings composed only of [A-Za-z0-9_-].", | |
1140 "quoted keys: same as \"basic strings\" or 'literal strings'.", | |
1141 "dotted keys: sequence of bare or quoted keys joined with a dot." | |
1142 })); | |
1143 } | |
1144 | |
1145 // forward-decl to implement parse_array and parse_table | |
1146 template<typename Value> | |
1147 result<Value, std::string> parse_value(location&, const std::size_t n_rec); | |
1148 | |
1149 template<typename Value> | |
1150 result<std::pair<typename Value::array_type, region>, std::string> | |
1151 parse_array(location& loc, const std::size_t n_rec) | |
1152 { | |
1153 using value_type = Value; | |
1154 using array_type = typename value_type::array_type; | |
1155 | |
1156 if(n_rec > TOML11_VALUE_RECURSION_LIMIT) | |
1157 { | |
1158 // parse_array does not have any way to handle recursive error currently... | |
1159 throw syntax_error(std::string("toml::parse_array: recursion limit (" | |
1160 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"), | |
1161 source_location(loc)); | |
1162 } | |
1163 | |
1164 const auto first = loc.iter(); | |
1165 if(loc.iter() == loc.end()) | |
1166 { | |
1167 return err("toml::parse_array: input is empty"); | |
1168 } | |
1169 if(*loc.iter() != '[') | |
1170 { | |
1171 return err("toml::parse_array: token is not an array"); | |
1172 } | |
1173 loc.advance(); | |
1174 | |
1175 using lex_ws_comment_newline = repeat< | |
1176 either<lex_wschar, lex_newline, lex_comment>, unlimited>; | |
1177 | |
1178 array_type retval; | |
1179 while(loc.iter() != loc.end()) | |
1180 { | |
1181 lex_ws_comment_newline::invoke(loc); // skip | |
1182 | |
1183 if(loc.iter() != loc.end() && *loc.iter() == ']') | |
1184 { | |
1185 loc.advance(); // skip ']' | |
1186 return ok(std::make_pair(retval, | |
1187 region(loc, first, loc.iter()))); | |
1188 } | |
1189 | |
1190 if(auto val = parse_value<value_type>(loc, n_rec+1)) | |
1191 { | |
1192 // After TOML v1.0.0-rc.1, array becomes to be able to have values | |
1193 // with different types. So here we will omit this by default. | |
1194 // | |
1195 // But some of the test-suite checks if the parser accepts a hetero- | |
1196 // geneous arrays, so we keep this for a while. | |
1197 #ifdef TOML11_DISALLOW_HETEROGENEOUS_ARRAYS | |
1198 if(!retval.empty() && retval.front().type() != val.as_ok().type()) | |
1199 { | |
1200 auto array_start_loc = loc; | |
1201 array_start_loc.reset(first); | |
1202 | |
1203 throw syntax_error(format_underline("toml::parse_array: " | |
1204 "type of elements should be the same each other.", { | |
1205 {source_location(array_start_loc), "array starts here"}, | |
1206 { | |
1207 retval.front().location(), | |
1208 "value has type " + stringize(retval.front().type()) | |
1209 }, | |
1210 { | |
1211 val.unwrap().location(), | |
1212 "value has different type, " + stringize(val.unwrap().type()) | |
1213 } | |
1214 }), source_location(loc)); | |
1215 } | |
1216 #endif | |
1217 retval.push_back(std::move(val.unwrap())); | |
1218 } | |
1219 else | |
1220 { | |
1221 auto array_start_loc = loc; | |
1222 array_start_loc.reset(first); | |
1223 | |
1224 throw syntax_error(format_underline("toml::parse_array: " | |
1225 "value having invalid format appeared in an array", { | |
1226 {source_location(array_start_loc), "array starts here"}, | |
1227 {source_location(loc), "it is not a valid value."} | |
1228 }), source_location(loc)); | |
1229 } | |
1230 | |
1231 using lex_array_separator = sequence<maybe<lex_ws_comment_newline>, character<','>>; | |
1232 const auto sp = lex_array_separator::invoke(loc); | |
1233 if(!sp) | |
1234 { | |
1235 lex_ws_comment_newline::invoke(loc); | |
1236 if(loc.iter() != loc.end() && *loc.iter() == ']') | |
1237 { | |
1238 loc.advance(); // skip ']' | |
1239 return ok(std::make_pair(retval, | |
1240 region(loc, first, loc.iter()))); | |
1241 } | |
1242 else | |
1243 { | |
1244 auto array_start_loc = loc; | |
1245 array_start_loc.reset(first); | |
1246 | |
1247 throw syntax_error(format_underline("toml::parse_array:" | |
1248 " missing array separator `,` after a value", { | |
1249 {source_location(array_start_loc), "array starts here"}, | |
1250 {source_location(loc), "should be `,`"} | |
1251 }), source_location(loc)); | |
1252 } | |
1253 } | |
1254 } | |
1255 loc.reset(first); | |
1256 throw syntax_error(format_underline("toml::parse_array: " | |
1257 "array did not closed by `]`", | |
1258 {{source_location(loc), "should be closed"}}), | |
1259 source_location(loc)); | |
1260 } | |
1261 | |
1262 template<typename Value> | |
1263 result<std::pair<std::pair<std::vector<key>, region>, Value>, std::string> | |
1264 parse_key_value_pair(location& loc, const std::size_t n_rec) | |
1265 { | |
1266 using value_type = Value; | |
1267 | |
1268 const auto first = loc.iter(); | |
1269 auto key_reg = parse_key(loc); | |
1270 if(!key_reg) | |
1271 { | |
1272 std::string msg = std::move(key_reg.unwrap_err()); | |
1273 // if the next token is keyvalue-separator, it means that there are no | |
1274 // key. then we need to show error as "empty key is not allowed". | |
1275 if(const auto keyval_sep = lex_keyval_sep::invoke(loc)) | |
1276 { | |
1277 loc.reset(first); | |
1278 msg = format_underline("toml::parse_key_value_pair: " | |
1279 "empty key is not allowed.", | |
1280 {{source_location(loc), "key expected before '='"}}); | |
1281 } | |
1282 return err(std::move(msg)); | |
1283 } | |
1284 | |
1285 const auto kvsp = lex_keyval_sep::invoke(loc); | |
1286 if(!kvsp) | |
1287 { | |
1288 std::string msg; | |
1289 // if the line contains '=' after the invalid sequence, possibly the | |
1290 // error is in the key (like, invalid character in bare key). | |
1291 const auto line_end = std::find(loc.iter(), loc.end(), '\n'); | |
1292 if(std::find(loc.iter(), line_end, '=') != line_end) | |
1293 { | |
1294 msg = format_underline("toml::parse_key_value_pair: " | |
1295 "invalid format for key", | |
1296 {{source_location(loc), "invalid character in key"}}, | |
1297 {"Did you forget '.' to separate dotted-key?", | |
1298 "Allowed characters for bare key are [0-9a-zA-Z_-]."}); | |
1299 } | |
1300 else // if not, the error is lack of key-value separator. | |
1301 { | |
1302 msg = format_underline("toml::parse_key_value_pair: " | |
1303 "missing key-value separator `=`", | |
1304 {{source_location(loc), "should be `=`"}}); | |
1305 } | |
1306 loc.reset(first); | |
1307 return err(std::move(msg)); | |
1308 } | |
1309 | |
1310 const auto after_kvsp = loc.iter(); // err msg | |
1311 auto val = parse_value<value_type>(loc, n_rec); | |
1312 if(!val) | |
1313 { | |
1314 std::string msg; | |
1315 loc.reset(after_kvsp); | |
1316 // check there is something not a comment/whitespace after `=` | |
1317 if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc)) | |
1318 { | |
1319 loc.reset(after_kvsp); | |
1320 msg = format_underline("toml::parse_key_value_pair: " | |
1321 "missing value after key-value separator '='", | |
1322 {{source_location(loc), "expected value, but got nothing"}}); | |
1323 } | |
1324 else // there is something not a comment/whitespace, so invalid format. | |
1325 { | |
1326 msg = std::move(val.unwrap_err()); | |
1327 } | |
1328 loc.reset(first); | |
1329 return err(msg); | |
1330 } | |
1331 return ok(std::make_pair(std::move(key_reg.unwrap()), | |
1332 std::move(val.unwrap()))); | |
1333 } | |
1334 | |
1335 // for error messages. | |
1336 template<typename InputIterator> | |
1337 std::string format_dotted_keys(InputIterator first, const InputIterator last) | |
1338 { | |
1339 static_assert(std::is_same<key, | |
1340 typename std::iterator_traits<InputIterator>::value_type>::value,""); | |
1341 | |
1342 std::string retval(*first++); | |
1343 for(; first != last; ++first) | |
1344 { | |
1345 retval += '.'; | |
1346 retval += *first; | |
1347 } | |
1348 return retval; | |
1349 } | |
1350 | |
1351 // forward decl for is_valid_forward_table_definition | |
1352 result<std::pair<std::vector<key>, region>, std::string> | |
1353 parse_table_key(location& loc); | |
1354 result<std::pair<std::vector<key>, region>, std::string> | |
1355 parse_array_table_key(location& loc); | |
1356 template<typename Value> | |
1357 result<std::pair<typename Value::table_type, region>, std::string> | |
1358 parse_inline_table(location& loc, const std::size_t n_rec); | |
1359 | |
1360 // The following toml file is allowed. | |
1361 // ```toml | |
1362 // [a.b.c] # here, table `a` has element `b`. | |
1363 // foo = "bar" | |
1364 // [a] # merge a = {baz = "qux"} to a = {b = {...}} | |
1365 // baz = "qux" | |
1366 // ``` | |
1367 // But the following is not allowed. | |
1368 // ```toml | |
1369 // [a] | |
1370 // b.c.foo = "bar" | |
1371 // [a] # error! the same table [a] defined! | |
1372 // baz = "qux" | |
1373 // ``` | |
1374 // The following is neither allowed. | |
1375 // ```toml | |
1376 // a = { b.c.foo = "bar"} | |
1377 // [a] # error! the same table [a] defined! | |
1378 // baz = "qux" | |
1379 // ``` | |
1380 // Here, it parses region of `tab->at(k)` as a table key and check the depth | |
1381 // of the key. If the key region points deeper node, it would be allowed. | |
1382 // Otherwise, the key points the same node. It would be rejected. | |
1383 template<typename Value, typename Iterator> | |
1384 bool is_valid_forward_table_definition(const Value& fwd, const Value& inserting, | |
1385 Iterator key_first, Iterator key_curr, Iterator key_last) | |
1386 { | |
1387 // ------------------------------------------------------------------------ | |
1388 // check type of the value to be inserted/merged | |
1389 | |
1390 std::string inserting_reg = ""; | |
1391 if(const auto ptr = detail::get_region(inserting)) | |
1392 { | |
1393 inserting_reg = ptr->str(); | |
1394 } | |
1395 location inserting_def("internal", std::move(inserting_reg)); | |
1396 if(const auto inlinetable = parse_inline_table<Value>(inserting_def, 0)) | |
1397 { | |
1398 // check if we are overwriting existing table. | |
1399 // ```toml | |
1400 // # NG | |
1401 // a.b = 42 | |
1402 // a = {d = 3.14} | |
1403 // ``` | |
1404 // Inserting an inline table to a existing super-table is not allowed in | |
1405 // any case. If we found it, we can reject it without further checking. | |
1406 return false; | |
1407 } | |
1408 | |
1409 // Valid and invalid cases when inserting to the [a.b] table: | |
1410 // | |
1411 // ## Invalid | |
1412 // | |
1413 // ```toml | |
1414 // # invalid | |
1415 // [a] | |
1416 // b.c.d = "foo" | |
1417 // [a.b] # a.b is already defined and closed | |
1418 // d = "bar" | |
1419 // ``` | |
1420 // ```toml | |
1421 // # invalid | |
1422 // a = {b.c.d = "foo"} | |
1423 // [a.b] # a is already defined and inline table is closed | |
1424 // d = "bar" | |
1425 // ``` | |
1426 // ```toml | |
1427 // # invalid | |
1428 // a.b.c.d = "foo" | |
1429 // [a.b] # a.b is already defined and dotted-key table is closed | |
1430 // d = "bar" | |
1431 // ``` | |
1432 // | |
1433 // ## Valid | |
1434 // | |
1435 // ```toml | |
1436 // # OK. a.b is defined, but is *overwritable* | |
1437 // [a.b.c] | |
1438 // d = "foo" | |
1439 // [a.b] | |
1440 // d = "bar" | |
1441 // ``` | |
1442 // ```toml | |
1443 // # OK. a.b is defined, but is *overwritable* | |
1444 // [a] | |
1445 // b.c.d = "foo" | |
1446 // b.e = "bar" | |
1447 // ``` | |
1448 | |
1449 // ------------------------------------------------------------------------ | |
1450 // check table defined before | |
1451 | |
1452 std::string internal = ""; | |
1453 if(const auto ptr = detail::get_region(fwd)) | |
1454 { | |
1455 internal = ptr->str(); | |
1456 } | |
1457 location def("internal", std::move(internal)); | |
1458 if(const auto tabkeys = parse_table_key(def)) // [table.key] | |
1459 { | |
1460 // table keys always contains all the nodes from the root. | |
1461 const auto& tks = tabkeys.unwrap().first; | |
1462 if(std::size_t(std::distance(key_first, key_last)) == tks.size() && | |
1463 std::equal(tks.begin(), tks.end(), key_first)) | |
1464 { | |
1465 // the keys are equivalent. it is not allowed. | |
1466 return false; | |
1467 } | |
1468 // the keys are not equivalent. it is allowed. | |
1469 return true; | |
1470 } | |
1471 // nested array-of-table definition implicitly defines tables. | |
1472 // those tables can be reopened. | |
1473 if(const auto atabkeys = parse_array_table_key(def)) | |
1474 { | |
1475 // table keys always contains all the nodes from the root. | |
1476 const auto& tks = atabkeys.unwrap().first; | |
1477 if(std::size_t(std::distance(key_first, key_last)) == tks.size() && | |
1478 std::equal(tks.begin(), tks.end(), key_first)) | |
1479 { | |
1480 // the keys are equivalent. it is not allowed. | |
1481 return false; | |
1482 } | |
1483 // the keys are not equivalent. it is allowed. | |
1484 return true; | |
1485 } | |
1486 if(const auto dotkeys = parse_key(def)) // a.b.c = "foo" | |
1487 { | |
1488 // consider the following case. | |
1489 // [a] | |
1490 // b.c = {d = 42} | |
1491 // [a.b.c] | |
1492 // e = 2.71 | |
1493 // this defines the table [a.b.c] twice. no? | |
1494 if(const auto reopening_dotkey_by_table = parse_table_key(inserting_def)) | |
1495 { | |
1496 // re-opening a dotkey-defined table by a table is invalid. | |
1497 // only dotkey can append a key-val. Like: | |
1498 // ```toml | |
1499 // a.b.c = "foo" | |
1500 // a.b.d = "bar" # OK. reopen `a.b` by dotkey | |
1501 // [a.b] | |
1502 // e = "bar" # Invalid. re-opening `a.b` by [a.b] is not allowed. | |
1503 // ``` | |
1504 return false; | |
1505 } | |
1506 | |
1507 // a dotted key starts from the node representing a table in which the | |
1508 // dotted key belongs to. | |
1509 const auto& dks = dotkeys.unwrap().first; | |
1510 if(std::size_t(std::distance(key_curr, key_last)) == dks.size() && | |
1511 std::equal(dks.begin(), dks.end(), key_curr)) | |
1512 { | |
1513 // the keys are equivalent. it is not allowed. | |
1514 return false; | |
1515 } | |
1516 // the keys are not equivalent. it is allowed. | |
1517 return true; | |
1518 } | |
1519 return false; | |
1520 } | |
1521 | |
1522 template<typename Value, typename InputIterator> | |
1523 result<bool, std::string> | |
1524 insert_nested_key(typename Value::table_type& root, const Value& v, | |
1525 InputIterator iter, const InputIterator last, | |
1526 region key_reg, | |
1527 const bool is_array_of_table = false) | |
1528 { | |
1529 static_assert(std::is_same<key, | |
1530 typename std::iterator_traits<InputIterator>::value_type>::value,""); | |
1531 | |
1532 using value_type = Value; | |
1533 using table_type = typename value_type::table_type; | |
1534 using array_type = typename value_type::array_type; | |
1535 | |
1536 const auto first = iter; | |
1537 assert(iter != last); | |
1538 | |
1539 table_type* tab = std::addressof(root); | |
1540 for(; iter != last; ++iter) // search recursively | |
1541 { | |
1542 const key& k = *iter; | |
1543 if(std::next(iter) == last) // k is the last key | |
1544 { | |
1545 // XXX if the value is array-of-tables, there can be several | |
1546 // tables that are in the same array. in that case, we need to | |
1547 // find the last element and insert it to there. | |
1548 if(is_array_of_table) | |
1549 { | |
1550 if(tab->count(k) == 1) // there is already an array of table | |
1551 { | |
1552 if(tab->at(k).is_table()) | |
1553 { | |
1554 // show special err msg for conflicting table | |
1555 throw syntax_error(format_underline(concat_to_string( | |
1556 "toml::insert_value: array of table (\"", | |
1557 format_dotted_keys(first, last), | |
1558 "\") cannot be defined"), { | |
1559 {tab->at(k).location(), "table already defined"}, | |
1560 {v.location(), "this conflicts with the previous table"} | |
1561 }), v.location()); | |
1562 } | |
1563 else if(!(tab->at(k).is_array())) | |
1564 { | |
1565 throw syntax_error(format_underline(concat_to_string( | |
1566 "toml::insert_value: array of table (\"", | |
1567 format_dotted_keys(first, last), "\") collides with" | |
1568 " existing value"), { | |
1569 {tab->at(k).location(), | |
1570 concat_to_string("this ", tab->at(k).type(), | |
1571 " value already exists")}, | |
1572 {v.location(), | |
1573 "while inserting this array-of-tables"} | |
1574 }), v.location()); | |
1575 } | |
1576 // the above if-else-if checks tab->at(k) is an array | |
1577 auto& a = tab->at(k).as_array(); | |
1578 // If table element is defined as [[array_of_tables]], it | |
1579 // cannot be an empty array. If an array of tables is | |
1580 // defined as `aot = []`, it cannot be appended. | |
1581 if(a.empty() || !(a.front().is_table())) | |
1582 { | |
1583 throw syntax_error(format_underline(concat_to_string( | |
1584 "toml::insert_value: array of table (\"", | |
1585 format_dotted_keys(first, last), "\") collides with" | |
1586 " existing value"), { | |
1587 {tab->at(k).location(), | |
1588 concat_to_string("this ", tab->at(k).type(), | |
1589 " value already exists")}, | |
1590 {v.location(), | |
1591 "while inserting this array-of-tables"} | |
1592 }), v.location()); | |
1593 } | |
1594 // avoid conflicting array of table like the following. | |
1595 // ```toml | |
1596 // a = [{b = 42}] # define a as an array of *inline* tables | |
1597 // [[a]] # a is an array of *multi-line* tables | |
1598 // b = 54 | |
1599 // ``` | |
1600 // Here, from the type information, these cannot be detected | |
1601 // because inline table is also a table. | |
1602 // But toml v0.5.0 explicitly says it is invalid. The above | |
1603 // array-of-tables has a static size and appending to the | |
1604 // array is invalid. | |
1605 // In this library, multi-line table value has a region | |
1606 // that points to the key of the table (e.g. [[a]]). By | |
1607 // comparing the first two letters in key, we can detect | |
1608 // the array-of-table is inline or multiline. | |
1609 if(const auto ptr = detail::get_region(a.front())) | |
1610 { | |
1611 if(ptr->str().substr(0,2) != "[[") | |
1612 { | |
1613 throw syntax_error(format_underline(concat_to_string( | |
1614 "toml::insert_value: array of table (\"", | |
1615 format_dotted_keys(first, last), "\") collides " | |
1616 "with existing array-of-tables"), { | |
1617 {tab->at(k).location(), | |
1618 concat_to_string("this ", tab->at(k).type(), | |
1619 " value has static size")}, | |
1620 {v.location(), | |
1621 "appending it to the statically sized array"} | |
1622 }), v.location()); | |
1623 } | |
1624 } | |
1625 a.push_back(v); | |
1626 return ok(true); | |
1627 } | |
1628 else // if not, we need to create the array of table | |
1629 { | |
1630 // XXX: Consider the following array of tables. | |
1631 // ```toml | |
1632 // # This is a comment. | |
1633 // [[aot]] | |
1634 // foo = "bar" | |
1635 // ``` | |
1636 // Here, the comment is for `aot`. But here, actually two | |
1637 // values are defined. An array that contains tables, named | |
1638 // `aot`, and the 0th element of the `aot`, `{foo = "bar"}`. | |
1639 // Those two are different from each other. But both of them | |
1640 // points to the same portion of the TOML file, `[[aot]]`, | |
1641 // so `key_reg.comments()` returns `# This is a comment`. | |
1642 // If it is assigned as a comment of `aot` defined here, the | |
1643 // comment will be duplicated. Both the `aot` itself and | |
1644 // the 0-th element will have the same comment. This causes | |
1645 // "duplication of the same comments" bug when the data is | |
1646 // serialized. | |
1647 // Next, consider the following. | |
1648 // ```toml | |
1649 // # comment 1 | |
1650 // aot = [ | |
1651 // # comment 2 | |
1652 // {foo = "bar"}, | |
1653 // ] | |
1654 // ``` | |
1655 // In this case, we can distinguish those two comments. So | |
1656 // here we need to add "comment 1" to the `aot` and | |
1657 // "comment 2" to the 0th element of that. | |
1658 // To distinguish those two, we check the key region. | |
1659 std::vector<std::string> comments{/* empty by default */}; | |
1660 if(key_reg.str().substr(0, 2) != "[[") | |
1661 { | |
1662 comments = key_reg.comments(); | |
1663 } | |
1664 value_type aot(array_type(1, v), key_reg, std::move(comments)); | |
1665 tab->insert(std::make_pair(k, aot)); | |
1666 return ok(true); | |
1667 } | |
1668 } // end if(array of table) | |
1669 | |
1670 if(tab->count(k) == 1) | |
1671 { | |
1672 if(tab->at(k).is_table() && v.is_table()) | |
1673 { | |
1674 if(!is_valid_forward_table_definition( | |
1675 tab->at(k), v, first, iter, last)) | |
1676 { | |
1677 throw syntax_error(format_underline(concat_to_string( | |
1678 "toml::insert_value: table (\"", | |
1679 format_dotted_keys(first, last), | |
1680 "\") already exists."), { | |
1681 {tab->at(k).location(), "table already exists here"}, | |
1682 {v.location(), "table defined twice"} | |
1683 }), v.location()); | |
1684 } | |
1685 // to allow the following toml file. | |
1686 // [a.b.c] | |
1687 // d = 42 | |
1688 // [a] | |
1689 // e = 2.71 | |
1690 auto& t = tab->at(k).as_table(); | |
1691 for(const auto& kv : v.as_table()) | |
1692 { | |
1693 if(tab->at(k).contains(kv.first)) | |
1694 { | |
1695 throw syntax_error(format_underline(concat_to_string( | |
1696 "toml::insert_value: value (\"", | |
1697 format_dotted_keys(first, last), | |
1698 "\") already exists."), { | |
1699 {t.at(kv.first).location(), "already exists here"}, | |
1700 {v.location(), "this defined twice"} | |
1701 }), v.location()); | |
1702 } | |
1703 t[kv.first] = kv.second; | |
1704 } | |
1705 detail::change_region(tab->at(k), key_reg); | |
1706 return ok(true); | |
1707 } | |
1708 else if(v.is_table() && | |
1709 tab->at(k).is_array() && | |
1710 tab->at(k).as_array().size() > 0 && | |
1711 tab->at(k).as_array().front().is_table()) | |
1712 { | |
1713 throw syntax_error(format_underline(concat_to_string( | |
1714 "toml::insert_value: array of tables (\"", | |
1715 format_dotted_keys(first, last), "\") already exists."), { | |
1716 {tab->at(k).location(), "array of tables defined here"}, | |
1717 {v.location(), "table conflicts with the previous array of table"} | |
1718 }), v.location()); | |
1719 } | |
1720 else | |
1721 { | |
1722 throw syntax_error(format_underline(concat_to_string( | |
1723 "toml::insert_value: value (\"", | |
1724 format_dotted_keys(first, last), "\") already exists."), { | |
1725 {tab->at(k).location(), "value already exists here"}, | |
1726 {v.location(), "value defined twice"} | |
1727 }), v.location()); | |
1728 } | |
1729 } | |
1730 tab->insert(std::make_pair(k, v)); | |
1731 return ok(true); | |
1732 } | |
1733 else // k is not the last one, we should insert recursively | |
1734 { | |
1735 // if there is no corresponding value, insert it first. | |
1736 // related: you don't need to write | |
1737 // # [x] | |
1738 // # [x.y] | |
1739 // to write | |
1740 // [x.y.z] | |
1741 if(tab->count(k) == 0) | |
1742 { | |
1743 // a table that is defined implicitly doesn't have any comments. | |
1744 (*tab)[k] = value_type(table_type{}, key_reg, {/*no comment*/}); | |
1745 } | |
1746 | |
1747 // type checking... | |
1748 if(tab->at(k).is_table()) | |
1749 { | |
1750 // According to toml-lang/toml:36d3091b3 "Clarify that inline | |
1751 // tables are immutable", check if it adds key-value pair to an | |
1752 // inline table. | |
1753 if(const auto* ptr = get_region(tab->at(k))) | |
1754 { | |
1755 // here, if the value is a (multi-line) table, the region | |
1756 // should be something like `[table-name]`. | |
1757 if(ptr->front() == '{') | |
1758 { | |
1759 throw syntax_error(format_underline(concat_to_string( | |
1760 "toml::insert_value: inserting to an inline table (", | |
1761 format_dotted_keys(first, std::next(iter)), | |
1762 ") but inline tables are immutable"), { | |
1763 {tab->at(k).location(), "inline tables are immutable"}, | |
1764 {v.location(), "inserting this"} | |
1765 }), v.location()); | |
1766 } | |
1767 } | |
1768 tab = std::addressof((*tab)[k].as_table()); | |
1769 } | |
1770 else if(tab->at(k).is_array()) // inserting to array-of-tables? | |
1771 { | |
1772 auto& a = (*tab)[k].as_array(); | |
1773 if(!a.back().is_table()) | |
1774 { | |
1775 throw syntax_error(format_underline(concat_to_string( | |
1776 "toml::insert_value: target (", | |
1777 format_dotted_keys(first, std::next(iter)), | |
1778 ") is neither table nor an array of tables"), { | |
1779 {a.back().location(), concat_to_string( | |
1780 "actual type is ", a.back().type())}, | |
1781 {v.location(), "inserting this"} | |
1782 }), v.location()); | |
1783 } | |
1784 if(a.empty()) | |
1785 { | |
1786 throw syntax_error(format_underline(concat_to_string( | |
1787 "toml::insert_value: table (\"", | |
1788 format_dotted_keys(first, last), "\") conflicts with" | |
1789 " existing value"), { | |
1790 {tab->at(k).location(), std::string("this array is not insertable")}, | |
1791 {v.location(), std::string("appending it to the statically sized array")} | |
1792 }), v.location()); | |
1793 } | |
1794 if(const auto ptr = detail::get_region(a.at(0))) | |
1795 { | |
1796 if(ptr->str().substr(0,2) != "[[") | |
1797 { | |
1798 throw syntax_error(format_underline(concat_to_string( | |
1799 "toml::insert_value: a table (\"", | |
1800 format_dotted_keys(first, last), "\") cannot be " | |
1801 "inserted to an existing inline array-of-tables"), { | |
1802 {tab->at(k).location(), std::string("this array of table has a static size")}, | |
1803 {v.location(), std::string("appending it to the statically sized array")} | |
1804 }), v.location()); | |
1805 } | |
1806 } | |
1807 tab = std::addressof(a.back().as_table()); | |
1808 } | |
1809 else | |
1810 { | |
1811 throw syntax_error(format_underline(concat_to_string( | |
1812 "toml::insert_value: target (", | |
1813 format_dotted_keys(first, std::next(iter)), | |
1814 ") is neither table nor an array of tables"), { | |
1815 {tab->at(k).location(), concat_to_string( | |
1816 "actual type is ", tab->at(k).type())}, | |
1817 {v.location(), "inserting this"} | |
1818 }), v.location()); | |
1819 } | |
1820 } | |
1821 } | |
1822 return err(std::string("toml::detail::insert_nested_key: never reach here")); | |
1823 } | |
1824 | |
1825 template<typename Value> | |
1826 result<std::pair<typename Value::table_type, region>, std::string> | |
1827 parse_inline_table(location& loc, const std::size_t n_rec) | |
1828 { | |
1829 using value_type = Value; | |
1830 using table_type = typename value_type::table_type; | |
1831 | |
1832 if(n_rec > TOML11_VALUE_RECURSION_LIMIT) | |
1833 { | |
1834 throw syntax_error(std::string("toml::parse_inline_table: recursion limit (" | |
1835 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"), | |
1836 source_location(loc)); | |
1837 } | |
1838 | |
1839 const auto first = loc.iter(); | |
1840 table_type retval; | |
1841 if(!(loc.iter() != loc.end() && *loc.iter() == '{')) | |
1842 { | |
1843 return err(format_underline("toml::parse_inline_table: ", | |
1844 {{source_location(loc), "the next token is not an inline table"}})); | |
1845 } | |
1846 loc.advance(); | |
1847 | |
1848 // check if the inline table is an empty table = { } | |
1849 maybe<lex_ws>::invoke(loc); | |
1850 if(loc.iter() != loc.end() && *loc.iter() == '}') | |
1851 { | |
1852 loc.advance(); // skip `}` | |
1853 return ok(std::make_pair(retval, region(loc, first, loc.iter()))); | |
1854 } | |
1855 | |
1856 // it starts from "{". it should be formatted as inline-table | |
1857 while(loc.iter() != loc.end()) | |
1858 { | |
1859 const auto kv_r = parse_key_value_pair<value_type>(loc, n_rec+1); | |
1860 if(!kv_r) | |
1861 { | |
1862 return err(kv_r.unwrap_err()); | |
1863 } | |
1864 | |
1865 const auto& kvpair = kv_r.unwrap(); | |
1866 const std::vector<key>& keys = kvpair.first.first; | |
1867 const auto& key_reg = kvpair.first.second; | |
1868 const value_type& val = kvpair.second; | |
1869 | |
1870 const auto inserted = | |
1871 insert_nested_key(retval, val, keys.begin(), keys.end(), key_reg); | |
1872 if(!inserted) | |
1873 { | |
1874 throw internal_error("toml::parse_inline_table: " | |
1875 "failed to insert value into table: " + inserted.unwrap_err(), | |
1876 source_location(loc)); | |
1877 } | |
1878 | |
1879 using lex_table_separator = sequence<maybe<lex_ws>, character<','>>; | |
1880 const auto sp = lex_table_separator::invoke(loc); | |
1881 | |
1882 if(!sp) | |
1883 { | |
1884 maybe<lex_ws>::invoke(loc); | |
1885 | |
1886 if(loc.iter() == loc.end()) | |
1887 { | |
1888 throw syntax_error(format_underline( | |
1889 "toml::parse_inline_table: missing table separator `}` ", | |
1890 {{source_location(loc), "should be `}`"}}), | |
1891 source_location(loc)); | |
1892 } | |
1893 else if(*loc.iter() == '}') | |
1894 { | |
1895 loc.advance(); // skip `}` | |
1896 return ok(std::make_pair( | |
1897 retval, region(loc, first, loc.iter()))); | |
1898 } | |
1899 else if(*loc.iter() == '#' || *loc.iter() == '\r' || *loc.iter() == '\n') | |
1900 { | |
1901 throw syntax_error(format_underline( | |
1902 "toml::parse_inline_table: missing curly brace `}`", | |
1903 {{source_location(loc), "should be `}`"}}), | |
1904 source_location(loc)); | |
1905 } | |
1906 else | |
1907 { | |
1908 throw syntax_error(format_underline( | |
1909 "toml::parse_inline_table: missing table separator `,` ", | |
1910 {{source_location(loc), "should be `,`"}}), | |
1911 source_location(loc)); | |
1912 } | |
1913 } | |
1914 else // `,` is found | |
1915 { | |
1916 maybe<lex_ws>::invoke(loc); | |
1917 if(loc.iter() != loc.end() && *loc.iter() == '}') | |
1918 { | |
1919 throw syntax_error(format_underline( | |
1920 "toml::parse_inline_table: trailing comma is not allowed in" | |
1921 " an inline table", | |
1922 {{source_location(loc), "should be `}`"}}), | |
1923 source_location(loc)); | |
1924 } | |
1925 } | |
1926 } | |
1927 loc.reset(first); | |
1928 throw syntax_error(format_underline("toml::parse_inline_table: " | |
1929 "inline table did not closed by `}`", | |
1930 {{source_location(loc), "should be closed"}}), | |
1931 source_location(loc)); | |
1932 } | |
1933 | |
1934 inline result<value_t, std::string> guess_number_type(const location& l) | |
1935 { | |
1936 // This function tries to find some (common) mistakes by checking characters | |
1937 // that follows the last character of a value. But it is often difficult | |
1938 // because some non-newline characters can appear after a value. E.g. | |
1939 // spaces, tabs, commas (in an array or inline table), closing brackets | |
1940 // (of an array or inline table), comment-sign (#). Since this function | |
1941 // does not parse further, those characters are always allowed to be there. | |
1942 location loc = l; | |
1943 | |
1944 if(lex_offset_date_time::invoke(loc)) {return ok(value_t::offset_datetime);} | |
1945 loc.reset(l.iter()); | |
1946 | |
1947 if(lex_local_date_time::invoke(loc)) | |
1948 { | |
1949 // bad offset may appear after this. | |
1950 if(loc.iter() != loc.end() && (*loc.iter() == '+' || *loc.iter() == '-' | |
1951 || *loc.iter() == 'Z' || *loc.iter() == 'z')) | |
1952 { | |
1953 return err(format_underline("bad offset: should be [+-]HH:MM or Z", | |
1954 {{source_location(loc), "[+-]HH:MM or Z"}}, | |
1955 {"pass: +09:00, -05:30", "fail: +9:00, -5:30"})); | |
1956 } | |
1957 return ok(value_t::local_datetime); | |
1958 } | |
1959 loc.reset(l.iter()); | |
1960 | |
1961 if(lex_local_date::invoke(loc)) | |
1962 { | |
1963 // bad time may appear after this. | |
1964 // A space is allowed as a delimiter between local time. But there are | |
1965 // both cases in which a space becomes valid or invalid. | |
1966 // - invalid: 2019-06-16 7:00:00 | |
1967 // - valid : 2019-06-16 07:00:00 | |
1968 if(loc.iter() != loc.end()) | |
1969 { | |
1970 const auto c = *loc.iter(); | |
1971 if(c == 'T' || c == 't') | |
1972 { | |
1973 return err(format_underline("bad time: should be HH:MM:SS.subsec", | |
1974 {{source_location(loc), "HH:MM:SS.subsec"}}, | |
1975 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999", | |
1976 "fail: 1979-05-27T7:32:00, 1979-05-27 17:32"})); | |
1977 } | |
1978 if('0' <= c && c <= '9') | |
1979 { | |
1980 return err(format_underline("bad time: missing T", | |
1981 {{source_location(loc), "T or space required here"}}, | |
1982 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999", | |
1983 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"})); | |
1984 } | |
1985 if(c == ' ' && std::next(loc.iter()) != loc.end() && | |
1986 ('0' <= *std::next(loc.iter()) && *std::next(loc.iter())<= '9')) | |
1987 { | |
1988 loc.advance(); | |
1989 return err(format_underline("bad time: should be HH:MM:SS.subsec", | |
1990 {{source_location(loc), "HH:MM:SS.subsec"}}, | |
1991 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999", | |
1992 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"})); | |
1993 } | |
1994 } | |
1995 return ok(value_t::local_date); | |
1996 } | |
1997 loc.reset(l.iter()); | |
1998 | |
1999 if(lex_local_time::invoke(loc)) {return ok(value_t::local_time);} | |
2000 loc.reset(l.iter()); | |
2001 | |
2002 if(lex_float::invoke(loc)) | |
2003 { | |
2004 if(loc.iter() != loc.end() && *loc.iter() == '_') | |
2005 { | |
2006 return err(format_underline("bad float: `_` should be surrounded by digits", | |
2007 {{source_location(loc), "here"}}, | |
2008 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan", | |
2009 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"})); | |
2010 } | |
2011 return ok(value_t::floating); | |
2012 } | |
2013 loc.reset(l.iter()); | |
2014 | |
2015 if(lex_integer::invoke(loc)) | |
2016 { | |
2017 if(loc.iter() != loc.end()) | |
2018 { | |
2019 const auto c = *loc.iter(); | |
2020 if(c == '_') | |
2021 { | |
2022 return err(format_underline("bad integer: `_` should be surrounded by digits", | |
2023 {{source_location(loc), "here"}}, | |
2024 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755", | |
2025 "fail: 1__000, 0123"})); | |
2026 } | |
2027 if('0' <= c && c <= '9') | |
2028 { | |
2029 // leading zero. point '0' | |
2030 loc.retrace(); | |
2031 return err(format_underline("bad integer: leading zero", | |
2032 {{source_location(loc), "here"}}, | |
2033 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755", | |
2034 "fail: 1__000, 0123"})); | |
2035 } | |
2036 if(c == ':' || c == '-') | |
2037 { | |
2038 return err(format_underline("bad datetime: invalid format", | |
2039 {{source_location(loc), "here"}}, | |
2040 {"pass: 1979-05-27T07:32:00-07:00, 1979-05-27 07:32:00.999999Z", | |
2041 "fail: 1979-05-27T7:32:00-7:00, 1979-05-27 7:32-00:30"})); | |
2042 } | |
2043 if(c == '.' || c == 'e' || c == 'E') | |
2044 { | |
2045 return err(format_underline("bad float: invalid format", | |
2046 {{source_location(loc), "here"}}, | |
2047 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan", | |
2048 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"})); | |
2049 } | |
2050 } | |
2051 return ok(value_t::integer); | |
2052 } | |
2053 if(loc.iter() != loc.end() && *loc.iter() == '.') | |
2054 { | |
2055 return err(format_underline("bad float: invalid format", | |
2056 {{source_location(loc), "integer part required before this"}}, | |
2057 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan", | |
2058 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"})); | |
2059 } | |
2060 if(loc.iter() != loc.end() && *loc.iter() == '_') | |
2061 { | |
2062 return err(format_underline("bad number: `_` should be surrounded by digits", | |
2063 {{source_location(loc), "`_` is not surrounded by digits"}}, | |
2064 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755", | |
2065 "fail: 1__000, 0123"})); | |
2066 } | |
2067 return err(format_underline("bad format: unknown value appeared", | |
2068 {{source_location(loc), "here"}})); | |
2069 } | |
2070 | |
2071 inline result<value_t, std::string> guess_value_type(const location& loc) | |
2072 { | |
2073 switch(*loc.iter()) | |
2074 { | |
2075 case '"' : {return ok(value_t::string); } | |
2076 case '\'': {return ok(value_t::string); } | |
2077 case 't' : {return ok(value_t::boolean); } | |
2078 case 'f' : {return ok(value_t::boolean); } | |
2079 case '[' : {return ok(value_t::array); } | |
2080 case '{' : {return ok(value_t::table); } | |
2081 case 'i' : {return ok(value_t::floating);} // inf. | |
2082 case 'n' : {return ok(value_t::floating);} // nan. | |
2083 default : {return guess_number_type(loc);} | |
2084 } | |
2085 } | |
2086 | |
2087 template<typename Value, typename T> | |
2088 result<Value, std::string> | |
2089 parse_value_helper(result<std::pair<T, region>, std::string> rslt) | |
2090 { | |
2091 if(rslt.is_ok()) | |
2092 { | |
2093 auto comments = rslt.as_ok().second.comments(); | |
2094 return ok(Value(std::move(rslt.as_ok()), std::move(comments))); | |
2095 } | |
2096 else | |
2097 { | |
2098 return err(std::move(rslt.as_err())); | |
2099 } | |
2100 } | |
2101 | |
2102 template<typename Value> | |
2103 result<Value, std::string> parse_value(location& loc, const std::size_t n_rec) | |
2104 { | |
2105 const auto first = loc.iter(); | |
2106 if(first == loc.end()) | |
2107 { | |
2108 return err(format_underline("toml::parse_value: input is empty", | |
2109 {{source_location(loc), ""}})); | |
2110 } | |
2111 | |
2112 const auto type = guess_value_type(loc); | |
2113 if(!type) | |
2114 { | |
2115 return err(type.unwrap_err()); | |
2116 } | |
2117 | |
2118 switch(type.unwrap()) | |
2119 { | |
2120 case value_t::boolean : {return parse_value_helper<Value>(parse_boolean(loc) );} | |
2121 case value_t::integer : {return parse_value_helper<Value>(parse_integer(loc) );} | |
2122 case value_t::floating : {return parse_value_helper<Value>(parse_floating(loc) );} | |
2123 case value_t::string : {return parse_value_helper<Value>(parse_string(loc) );} | |
2124 case value_t::offset_datetime: {return parse_value_helper<Value>(parse_offset_datetime(loc) );} | |
2125 case value_t::local_datetime : {return parse_value_helper<Value>(parse_local_datetime(loc) );} | |
2126 case value_t::local_date : {return parse_value_helper<Value>(parse_local_date(loc) );} | |
2127 case value_t::local_time : {return parse_value_helper<Value>(parse_local_time(loc) );} | |
2128 case value_t::array : {return parse_value_helper<Value>(parse_array<Value>(loc, n_rec));} | |
2129 case value_t::table : {return parse_value_helper<Value>(parse_inline_table<Value>(loc, n_rec));} | |
2130 default: | |
2131 { | |
2132 const auto msg = format_underline("toml::parse_value: " | |
2133 "unknown token appeared", {{source_location(loc), "unknown"}}); | |
2134 loc.reset(first); | |
2135 return err(msg); | |
2136 } | |
2137 } | |
2138 } | |
2139 | |
2140 inline result<std::pair<std::vector<key>, region>, std::string> | |
2141 parse_table_key(location& loc) | |
2142 { | |
2143 if(auto token = lex_std_table::invoke(loc)) | |
2144 { | |
2145 location inner_loc(loc.name(), token.unwrap().str()); | |
2146 | |
2147 const auto open = lex_std_table_open::invoke(inner_loc); | |
2148 if(!open || inner_loc.iter() == inner_loc.end()) | |
2149 { | |
2150 throw internal_error(format_underline( | |
2151 "toml::parse_table_key: no `[`", | |
2152 {{source_location(inner_loc), "should be `[`"}}), | |
2153 source_location(inner_loc)); | |
2154 } | |
2155 // to skip [ a . b . c ] | |
2156 // ^----------- this whitespace | |
2157 lex_ws::invoke(inner_loc); | |
2158 const auto keys = parse_key(inner_loc); | |
2159 if(!keys) | |
2160 { | |
2161 throw internal_error(format_underline( | |
2162 "toml::parse_table_key: invalid key", | |
2163 {{source_location(inner_loc), "not key"}}), | |
2164 source_location(inner_loc)); | |
2165 } | |
2166 // to skip [ a . b . c ] | |
2167 // ^-- this whitespace | |
2168 lex_ws::invoke(inner_loc); | |
2169 const auto close = lex_std_table_close::invoke(inner_loc); | |
2170 if(!close) | |
2171 { | |
2172 throw internal_error(format_underline( | |
2173 "toml::parse_table_key: no `]`", | |
2174 {{source_location(inner_loc), "should be `]`"}}), | |
2175 source_location(inner_loc)); | |
2176 } | |
2177 | |
2178 // after [table.key], newline or EOF(empty table) required. | |
2179 if(loc.iter() != loc.end()) | |
2180 { | |
2181 using lex_newline_after_table_key = | |
2182 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>; | |
2183 const auto nl = lex_newline_after_table_key::invoke(loc); | |
2184 if(!nl) | |
2185 { | |
2186 throw syntax_error(format_underline( | |
2187 "toml::parse_table_key: newline required after [table.key]", | |
2188 {{source_location(loc), "expected newline"}}), | |
2189 source_location(loc)); | |
2190 } | |
2191 } | |
2192 return ok(std::make_pair(keys.unwrap().first, token.unwrap())); | |
2193 } | |
2194 else | |
2195 { | |
2196 return err(format_underline("toml::parse_table_key: " | |
2197 "not a valid table key", {{source_location(loc), "here"}})); | |
2198 } | |
2199 } | |
2200 | |
2201 inline result<std::pair<std::vector<key>, region>, std::string> | |
2202 parse_array_table_key(location& loc) | |
2203 { | |
2204 if(auto token = lex_array_table::invoke(loc)) | |
2205 { | |
2206 location inner_loc(loc.name(), token.unwrap().str()); | |
2207 | |
2208 const auto open = lex_array_table_open::invoke(inner_loc); | |
2209 if(!open || inner_loc.iter() == inner_loc.end()) | |
2210 { | |
2211 throw internal_error(format_underline( | |
2212 "toml::parse_array_table_key: no `[[`", | |
2213 {{source_location(inner_loc), "should be `[[`"}}), | |
2214 source_location(inner_loc)); | |
2215 } | |
2216 lex_ws::invoke(inner_loc); | |
2217 const auto keys = parse_key(inner_loc); | |
2218 if(!keys) | |
2219 { | |
2220 throw internal_error(format_underline( | |
2221 "toml::parse_array_table_key: invalid key", | |
2222 {{source_location(inner_loc), "not a key"}}), | |
2223 source_location(inner_loc)); | |
2224 } | |
2225 lex_ws::invoke(inner_loc); | |
2226 const auto close = lex_array_table_close::invoke(inner_loc); | |
2227 if(!close) | |
2228 { | |
2229 throw internal_error(format_underline( | |
2230 "toml::parse_array_table_key: no `]]`", | |
2231 {{source_location(inner_loc), "should be `]]`"}}), | |
2232 source_location(inner_loc)); | |
2233 } | |
2234 | |
2235 // after [[table.key]], newline or EOF(empty table) required. | |
2236 if(loc.iter() != loc.end()) | |
2237 { | |
2238 using lex_newline_after_table_key = | |
2239 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>; | |
2240 const auto nl = lex_newline_after_table_key::invoke(loc); | |
2241 if(!nl) | |
2242 { | |
2243 throw syntax_error(format_underline("toml::" | |
2244 "parse_array_table_key: newline required after [[table.key]]", | |
2245 {{source_location(loc), "expected newline"}}), | |
2246 source_location(loc)); | |
2247 } | |
2248 } | |
2249 return ok(std::make_pair(keys.unwrap().first, token.unwrap())); | |
2250 } | |
2251 else | |
2252 { | |
2253 return err(format_underline("toml::parse_array_table_key: " | |
2254 "not a valid table key", {{source_location(loc), "here"}})); | |
2255 } | |
2256 } | |
2257 | |
2258 // parse table body (key-value pairs until the iter hits the next [tablekey]) | |
2259 template<typename Value> | |
2260 result<typename Value::table_type, std::string> | |
2261 parse_ml_table(location& loc) | |
2262 { | |
2263 using value_type = Value; | |
2264 using table_type = typename value_type::table_type; | |
2265 | |
2266 const auto first = loc.iter(); | |
2267 if(first == loc.end()) | |
2268 { | |
2269 return ok(table_type{}); | |
2270 } | |
2271 | |
2272 // XXX at lest one newline is needed. | |
2273 using skip_line = repeat< | |
2274 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>, at_least<1>>; | |
2275 skip_line::invoke(loc); | |
2276 lex_ws::invoke(loc); | |
2277 | |
2278 table_type tab; | |
2279 while(loc.iter() != loc.end()) | |
2280 { | |
2281 lex_ws::invoke(loc); | |
2282 const auto before = loc.iter(); | |
2283 if(const auto tmp = parse_array_table_key(loc)) // next table found | |
2284 { | |
2285 loc.reset(before); | |
2286 return ok(tab); | |
2287 } | |
2288 if(const auto tmp = parse_table_key(loc)) // next table found | |
2289 { | |
2290 loc.reset(before); | |
2291 return ok(tab); | |
2292 } | |
2293 | |
2294 if(const auto kv = parse_key_value_pair<value_type>(loc, 0)) | |
2295 { | |
2296 const auto& kvpair = kv.unwrap(); | |
2297 const std::vector<key>& keys = kvpair.first.first; | |
2298 const auto& key_reg = kvpair.first.second; | |
2299 const value_type& val = kvpair.second; | |
2300 const auto inserted = | |
2301 insert_nested_key(tab, val, keys.begin(), keys.end(), key_reg); | |
2302 if(!inserted) | |
2303 { | |
2304 return err(inserted.unwrap_err()); | |
2305 } | |
2306 } | |
2307 else | |
2308 { | |
2309 return err(kv.unwrap_err()); | |
2310 } | |
2311 | |
2312 // comment lines are skipped by the above function call. | |
2313 // However, since the `skip_line` requires at least 1 newline, it fails | |
2314 // if the file ends with ws and/or comment without newline. | |
2315 // `skip_line` matches `ws? + comment? + newline`, not `ws` or `comment` | |
2316 // itself. To skip the last ws and/or comment, call lexers. | |
2317 // It does not matter if these fails, so the return value is discarded. | |
2318 lex_ws::invoke(loc); | |
2319 lex_comment::invoke(loc); | |
2320 | |
2321 // skip_line is (whitespace? comment? newline)_{1,}. multiple empty lines | |
2322 // and comments after the last key-value pairs are allowed. | |
2323 const auto newline = skip_line::invoke(loc); | |
2324 if(!newline && loc.iter() != loc.end()) | |
2325 { | |
2326 const auto before2 = loc.iter(); | |
2327 lex_ws::invoke(loc); // skip whitespace | |
2328 const auto msg = format_underline("toml::parse_table: " | |
2329 "invalid line format", {{source_location(loc), concat_to_string( | |
2330 "expected newline, but got '", show_char(*loc.iter()), "'.")}}); | |
2331 loc.reset(before2); | |
2332 return err(msg); | |
2333 } | |
2334 | |
2335 // the skip_lines only matches with lines that includes newline. | |
2336 // to skip the last line that includes comment and/or whitespace | |
2337 // but no newline, call them one more time. | |
2338 lex_ws::invoke(loc); | |
2339 lex_comment::invoke(loc); | |
2340 } | |
2341 return ok(tab); | |
2342 } | |
2343 | |
2344 template<typename Value> | |
2345 result<Value, std::string> parse_toml_file(location& loc) | |
2346 { | |
2347 using value_type = Value; | |
2348 using table_type = typename value_type::table_type; | |
2349 | |
2350 const auto first = loc.iter(); | |
2351 if(first == loc.end()) | |
2352 { | |
2353 // For empty files, return an empty table with an empty region (zero-length). | |
2354 // Without the region, error messages would miss the filename. | |
2355 return ok(value_type(table_type{}, region(loc, first, first), {})); | |
2356 } | |
2357 | |
2358 // put the first line as a region of a file | |
2359 // Here first != loc.end(), so taking std::next is okay | |
2360 const region file(loc, first, std::next(loc.iter())); | |
2361 | |
2362 // The first successive comments that are separated from the first value | |
2363 // by an empty line are for a file itself. | |
2364 // ```toml | |
2365 // # this is a comment for a file. | |
2366 // | |
2367 // key = "the first value" | |
2368 // ``` | |
2369 // ```toml | |
2370 // # this is a comment for "the first value". | |
2371 // key = "the first value" | |
2372 // ``` | |
2373 std::vector<std::string> comments; | |
2374 using lex_first_comments = sequence< | |
2375 repeat<sequence<maybe<lex_ws>, lex_comment, lex_newline>, at_least<1>>, | |
2376 sequence<maybe<lex_ws>, lex_newline> | |
2377 >; | |
2378 if(const auto token = lex_first_comments::invoke(loc)) | |
2379 { | |
2380 location inner_loc(loc.name(), token.unwrap().str()); | |
2381 while(inner_loc.iter() != inner_loc.end()) | |
2382 { | |
2383 maybe<lex_ws>::invoke(inner_loc); // remove ws if exists | |
2384 if(lex_newline::invoke(inner_loc)) | |
2385 { | |
2386 assert(inner_loc.iter() == inner_loc.end()); | |
2387 break; // empty line found. | |
2388 } | |
2389 auto com = lex_comment::invoke(inner_loc).unwrap().str(); | |
2390 com.erase(com.begin()); // remove # sign | |
2391 comments.push_back(std::move(com)); | |
2392 lex_newline::invoke(inner_loc); | |
2393 } | |
2394 } | |
2395 | |
2396 table_type data; | |
2397 // root object is also a table, but without [tablename] | |
2398 if(const auto tab = parse_ml_table<value_type>(loc)) | |
2399 { | |
2400 data = std::move(tab.unwrap()); | |
2401 } | |
2402 else // failed (empty table is regarded as success in parse_ml_table) | |
2403 { | |
2404 return err(tab.unwrap_err()); | |
2405 } | |
2406 while(loc.iter() != loc.end()) | |
2407 { | |
2408 // here, the region of [table] is regarded as the table-key because | |
2409 // the table body is normally too big and it is not so informative | |
2410 // if the first key-value pair of the table is shown in the error | |
2411 // message. | |
2412 if(const auto tabkey = parse_array_table_key(loc)) | |
2413 { | |
2414 const auto tab = parse_ml_table<value_type>(loc); | |
2415 if(!tab){return err(tab.unwrap_err());} | |
2416 | |
2417 const auto& tk = tabkey.unwrap(); | |
2418 const auto& keys = tk.first; | |
2419 const auto& reg = tk.second; | |
2420 | |
2421 const auto inserted = insert_nested_key(data, | |
2422 value_type(tab.unwrap(), reg, reg.comments()), | |
2423 keys.begin(), keys.end(), reg, | |
2424 /*is_array_of_table=*/ true); | |
2425 if(!inserted) {return err(inserted.unwrap_err());} | |
2426 | |
2427 continue; | |
2428 } | |
2429 if(const auto tabkey = parse_table_key(loc)) | |
2430 { | |
2431 const auto tab = parse_ml_table<value_type>(loc); | |
2432 if(!tab){return err(tab.unwrap_err());} | |
2433 | |
2434 const auto& tk = tabkey.unwrap(); | |
2435 const auto& keys = tk.first; | |
2436 const auto& reg = tk.second; | |
2437 | |
2438 const auto inserted = insert_nested_key(data, | |
2439 value_type(tab.unwrap(), reg, reg.comments()), | |
2440 keys.begin(), keys.end(), reg); | |
2441 if(!inserted) {return err(inserted.unwrap_err());} | |
2442 | |
2443 continue; | |
2444 } | |
2445 return err(format_underline("toml::parse_toml_file: " | |
2446 "unknown line appeared", {{source_location(loc), "unknown format"}})); | |
2447 } | |
2448 | |
2449 return ok(Value(std::move(data), file, comments)); | |
2450 } | |
2451 | |
2452 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2453 template<typename ...> class Table = std::unordered_map, | |
2454 template<typename ...> class Array = std::vector> | |
2455 basic_value<Comment, Table, Array> | |
2456 parse(std::vector<char>& letters, const std::string& fname) | |
2457 { | |
2458 using value_type = basic_value<Comment, Table, Array>; | |
2459 | |
2460 // append LF. | |
2461 // Although TOML does not require LF at the EOF, to make parsing logic | |
2462 // simpler, we "normalize" the content by adding LF if it does not exist. | |
2463 // It also checks if the last char is CR, to avoid changing the meaning. | |
2464 // This is not the *best* way to deal with the last character, but is a | |
2465 // simple and quick fix. | |
2466 if(!letters.empty() && letters.back() != '\n' && letters.back() != '\r') | |
2467 { | |
2468 letters.push_back('\n'); | |
2469 } | |
2470 | |
2471 detail::location loc(std::move(fname), std::move(letters)); | |
2472 | |
2473 // skip BOM if exists. | |
2474 // XXX component of BOM (like 0xEF) exceeds the representable range of | |
2475 // signed char, so on some (actually, most) of the environment, these cannot | |
2476 // be compared to char. However, since we are always out of luck, we need to | |
2477 // check our chars are equivalent to BOM. To do this, first we need to | |
2478 // convert char to unsigned char to guarantee the comparability. | |
2479 if(loc.source()->size() >= 3) | |
2480 { | |
2481 std::array<unsigned char, 3> BOM; | |
2482 std::memcpy(BOM.data(), loc.source()->data(), 3); | |
2483 if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF) | |
2484 { | |
2485 loc.advance(3); // BOM found. skip. | |
2486 } | |
2487 } | |
2488 | |
2489 if (auto data = detail::parse_toml_file<value_type>(loc)) | |
2490 { | |
2491 return std::move(data).unwrap(); | |
2492 } | |
2493 else | |
2494 { | |
2495 throw syntax_error(std::move(data).unwrap_err(), source_location(loc)); | |
2496 } | |
2497 } | |
2498 | |
2499 } // detail | |
2500 | |
2501 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2502 template<typename ...> class Table = std::unordered_map, | |
2503 template<typename ...> class Array = std::vector> | |
2504 basic_value<Comment, Table, Array> | |
2505 parse(FILE * file, const std::string& fname) | |
2506 { | |
2507 const long beg = std::ftell(file); | |
2508 if (beg == -1l) | |
2509 { | |
2510 throw file_io_error(errno, "Failed to access", fname); | |
2511 } | |
2512 | |
2513 const int res_seekend = std::fseek(file, 0, SEEK_END); | |
2514 if (res_seekend != 0) | |
2515 { | |
2516 throw file_io_error(errno, "Failed to seek", fname); | |
2517 } | |
2518 | |
2519 const long end = std::ftell(file); | |
2520 if (end == -1l) | |
2521 { | |
2522 throw file_io_error(errno, "Failed to access", fname); | |
2523 } | |
2524 | |
2525 const auto fsize = end - beg; | |
2526 | |
2527 const auto res_seekbeg = std::fseek(file, beg, SEEK_SET); | |
2528 if (res_seekbeg != 0) | |
2529 { | |
2530 throw file_io_error(errno, "Failed to seek", fname); | |
2531 } | |
2532 | |
2533 // read whole file as a sequence of char | |
2534 assert(fsize >= 0); | |
2535 std::vector<char> letters(static_cast<std::size_t>(fsize)); | |
2536 std::fread(letters.data(), sizeof(char), static_cast<std::size_t>(fsize), file); | |
2537 | |
2538 return detail::parse<Comment, Table, Array>(letters, fname); | |
2539 } | |
2540 | |
2541 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2542 template<typename ...> class Table = std::unordered_map, | |
2543 template<typename ...> class Array = std::vector> | |
2544 basic_value<Comment, Table, Array> | |
2545 parse(std::istream& is, std::string fname = "unknown file") | |
2546 { | |
2547 const auto beg = is.tellg(); | |
2548 is.seekg(0, std::ios::end); | |
2549 const auto end = is.tellg(); | |
2550 const auto fsize = end - beg; | |
2551 is.seekg(beg); | |
2552 | |
2553 // read whole file as a sequence of char | |
2554 assert(fsize >= 0); | |
2555 std::vector<char> letters(static_cast<std::size_t>(fsize)); | |
2556 is.read(letters.data(), fsize); | |
2557 | |
2558 return detail::parse<Comment, Table, Array>(letters, fname); | |
2559 } | |
2560 | |
2561 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2562 template<typename ...> class Table = std::unordered_map, | |
2563 template<typename ...> class Array = std::vector> | |
2564 basic_value<Comment, Table, Array> parse(std::string fname) | |
2565 { | |
2566 std::ifstream ifs(fname, std::ios_base::binary); | |
2567 if(!ifs.good()) | |
2568 { | |
2569 throw std::ios_base::failure( | |
2570 "toml::parse: Error opening file \"" + fname + "\""); | |
2571 } | |
2572 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); | |
2573 return parse<Comment, Table, Array>(ifs, std::move(fname)); | |
2574 } | |
2575 | |
2576 #ifdef TOML11_HAS_STD_FILESYSTEM | |
2577 // This function just forwards `parse("filename.toml")` to std::string version | |
2578 // to avoid the ambiguity in overload resolution. | |
2579 // | |
2580 // Both std::string and std::filesystem::path are convertible from const char*. | |
2581 // Without this, both parse(std::string) and parse(std::filesystem::path) | |
2582 // matches to parse("filename.toml"). This breaks the existing code. | |
2583 // | |
2584 // This function exactly matches to the invocation with c-string. | |
2585 // So this function is preferred than others and the ambiguity disappears. | |
2586 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2587 template<typename ...> class Table = std::unordered_map, | |
2588 template<typename ...> class Array = std::vector> | |
2589 basic_value<Comment, Table, Array> parse(const char* fname) | |
2590 { | |
2591 return parse<Comment, Table, Array>(std::string(fname)); | |
2592 } | |
2593 | |
2594 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY, | |
2595 template<typename ...> class Table = std::unordered_map, | |
2596 template<typename ...> class Array = std::vector> | |
2597 basic_value<Comment, Table, Array> parse(const std::filesystem::path& fpath) | |
2598 { | |
2599 std::ifstream ifs(fpath, std::ios_base::binary); | |
2600 if(!ifs.good()) | |
2601 { | |
2602 throw std::ios_base::failure( | |
2603 "toml::parse: Error opening file \"" + fpath.string() + "\""); | |
2604 } | |
2605 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit); | |
2606 return parse<Comment, Table, Array>(ifs, fpath.string()); | |
2607 } | |
2608 #endif // TOML11_HAS_STD_FILESYSTEM | |
2609 | |
2610 } // toml | |
2611 #endif// TOML11_PARSER_HPP |