318
|
1 // Copyright Toru Niina 2017.
|
|
2 // Distributed under the MIT License.
|
|
3 #ifndef TOML11_PARSER_HPP
|
|
4 #define TOML11_PARSER_HPP
|
|
5 #include <cstring>
|
|
6 #include <fstream>
|
|
7 #include <sstream>
|
|
8
|
|
9 #include "combinator.hpp"
|
|
10 #include "lexer.hpp"
|
|
11 #include "macros.hpp"
|
|
12 #include "region.hpp"
|
|
13 #include "result.hpp"
|
|
14 #include "types.hpp"
|
|
15 #include "value.hpp"
|
|
16
|
|
17 #ifndef TOML11_DISABLE_STD_FILESYSTEM
|
|
18 #ifdef __cpp_lib_filesystem
|
|
19 #if __has_include(<filesystem>)
|
|
20 #define TOML11_HAS_STD_FILESYSTEM
|
|
21 #include <filesystem>
|
|
22 #endif // has_include(<string_view>)
|
|
23 #endif // __cpp_lib_filesystem
|
|
24 #endif // TOML11_DISABLE_STD_FILESYSTEM
|
|
25
|
|
26 // the previous commit works with 500+ recursions. so it may be too small.
|
|
27 // but in most cases, i think we don't need such a deep recursion of
|
|
28 // arrays or inline-tables.
|
|
29 #define TOML11_VALUE_RECURSION_LIMIT 64
|
|
30
|
|
31 namespace toml
|
|
32 {
|
|
33 namespace detail
|
|
34 {
|
|
35
|
|
36 inline result<std::pair<boolean, region>, std::string>
|
|
37 parse_boolean(location& loc)
|
|
38 {
|
|
39 const auto first = loc.iter();
|
|
40 if(const auto token = lex_boolean::invoke(loc))
|
|
41 {
|
|
42 const auto reg = token.unwrap();
|
|
43 if (reg.str() == "true") {return ok(std::make_pair(true, reg));}
|
|
44 else if(reg.str() == "false") {return ok(std::make_pair(false, reg));}
|
|
45 else // internal error.
|
|
46 {
|
|
47 throw internal_error(format_underline(
|
|
48 "toml::parse_boolean: internal error",
|
|
49 {{source_location(reg), "invalid token"}}),
|
|
50 source_location(reg));
|
|
51 }
|
|
52 }
|
|
53 loc.reset(first); //rollback
|
|
54 return err(format_underline("toml::parse_boolean: ",
|
|
55 {{source_location(loc), "the next token is not a boolean"}}));
|
|
56 }
|
|
57
|
|
58 inline result<std::pair<integer, region>, std::string>
|
|
59 parse_binary_integer(location& loc)
|
|
60 {
|
|
61 const auto first = loc.iter();
|
|
62 if(const auto token = lex_bin_int::invoke(loc))
|
|
63 {
|
|
64 auto str = token.unwrap().str();
|
|
65 assert(str.size() > 2); // minimum -> 0b1
|
|
66 assert(str.at(0) == '0' && str.at(1) == 'b');
|
|
67
|
|
68 // skip all the zeros and `_` locating at the MSB
|
|
69 str.erase(str.begin(), std::find_if(
|
|
70 str.begin() + 2, // to skip prefix `0b`
|
|
71 str.end(),
|
|
72 [](const char c) { return c == '1'; })
|
|
73 );
|
|
74 assert(str.empty() || str.front() == '1');
|
|
75
|
|
76 // since toml11 uses int64_t, 64bit (unsigned) input cannot be read.
|
|
77 const auto max_length = 63 + std::count(str.begin(), str.end(), '_');
|
|
78 if(static_cast<std::string::size_type>(max_length) < str.size())
|
|
79 {
|
|
80 loc.reset(first);
|
|
81 return err(format_underline("toml::parse_binary_integer: "
|
|
82 "only signed 64bit integer is available",
|
|
83 {{source_location(loc), "too large input (> int64_t)"}}));
|
|
84 }
|
|
85
|
|
86 integer retval(0), base(1);
|
|
87 for(auto i(str.rbegin()), e(str.rend()); i!=e; ++i)
|
|
88 {
|
|
89 assert(base != 0); // means overflow, checked in the above code
|
|
90 if(*i == '1')
|
|
91 {
|
|
92 retval += base;
|
|
93 if( (std::numeric_limits<integer>::max)() / 2 < base )
|
|
94 {
|
|
95 base = 0;
|
|
96 }
|
|
97 base *= 2;
|
|
98 }
|
|
99 else if(*i == '0')
|
|
100 {
|
|
101 if( (std::numeric_limits<integer>::max)() / 2 < base )
|
|
102 {
|
|
103 base = 0;
|
|
104 }
|
|
105 base *= 2;
|
|
106 }
|
|
107 else if(*i == '_')
|
|
108 {
|
|
109 // do nothing.
|
|
110 }
|
|
111 else // should be detected by lex_bin_int. [[unlikely]]
|
|
112 {
|
|
113 throw internal_error(format_underline(
|
|
114 "toml::parse_binary_integer: internal error",
|
|
115 {{source_location(token.unwrap()), "invalid token"}}),
|
|
116 source_location(loc));
|
|
117 }
|
|
118 }
|
|
119 return ok(std::make_pair(retval, token.unwrap()));
|
|
120 }
|
|
121 loc.reset(first);
|
|
122 return err(format_underline("toml::parse_binary_integer:",
|
|
123 {{source_location(loc), "the next token is not an integer"}}));
|
|
124 }
|
|
125
|
|
126 inline result<std::pair<integer, region>, std::string>
|
|
127 parse_octal_integer(location& loc)
|
|
128 {
|
|
129 const auto first = loc.iter();
|
|
130 if(const auto token = lex_oct_int::invoke(loc))
|
|
131 {
|
|
132 auto str = token.unwrap().str();
|
|
133 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
134 str.erase(str.begin()); str.erase(str.begin()); // remove `0o` prefix
|
|
135
|
|
136 std::istringstream iss(str);
|
|
137 integer retval(0);
|
|
138 iss >> std::oct >> retval;
|
|
139 if(iss.fail())
|
|
140 {
|
|
141 // `istream` sets `failbit` if internally-called `std::num_get::get`
|
|
142 // fails.
|
|
143 // `std::num_get::get` calls `std::strtoll` if the argument type is
|
|
144 // signed.
|
|
145 // `std::strtoll` fails if
|
|
146 // - the value is out_of_range or
|
|
147 // - no conversion is possible.
|
|
148 // since we already checked that the string is valid octal integer,
|
|
149 // so the error reason is out_of_range.
|
|
150 loc.reset(first);
|
|
151 return err(format_underline("toml::parse_octal_integer:",
|
|
152 {{source_location(loc), "out of range"}}));
|
|
153 }
|
|
154 return ok(std::make_pair(retval, token.unwrap()));
|
|
155 }
|
|
156 loc.reset(first);
|
|
157 return err(format_underline("toml::parse_octal_integer:",
|
|
158 {{source_location(loc), "the next token is not an integer"}}));
|
|
159 }
|
|
160
|
|
161 inline result<std::pair<integer, region>, std::string>
|
|
162 parse_hexadecimal_integer(location& loc)
|
|
163 {
|
|
164 const auto first = loc.iter();
|
|
165 if(const auto token = lex_hex_int::invoke(loc))
|
|
166 {
|
|
167 auto str = token.unwrap().str();
|
|
168 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
169 str.erase(str.begin()); str.erase(str.begin()); // remove `0x` prefix
|
|
170
|
|
171 std::istringstream iss(str);
|
|
172 integer retval(0);
|
|
173 iss >> std::hex >> retval;
|
|
174 if(iss.fail())
|
|
175 {
|
|
176 // see parse_octal_integer for detail of this error message.
|
|
177 loc.reset(first);
|
|
178 return err(format_underline("toml::parse_hexadecimal_integer:",
|
|
179 {{source_location(loc), "out of range"}}));
|
|
180 }
|
|
181 return ok(std::make_pair(retval, token.unwrap()));
|
|
182 }
|
|
183 loc.reset(first);
|
|
184 return err(format_underline("toml::parse_hexadecimal_integer",
|
|
185 {{source_location(loc), "the next token is not an integer"}}));
|
|
186 }
|
|
187
|
|
188 inline result<std::pair<integer, region>, std::string>
|
|
189 parse_integer(location& loc)
|
|
190 {
|
|
191 const auto first = loc.iter();
|
|
192 if(first != loc.end() && *first == '0')
|
|
193 {
|
|
194 const auto second = std::next(first);
|
|
195 if(second == loc.end()) // the token is just zero.
|
|
196 {
|
|
197 loc.advance();
|
|
198 return ok(std::make_pair(0, region(loc, first, second)));
|
|
199 }
|
|
200
|
|
201 if(*second == 'b') {return parse_binary_integer (loc);} // 0b1100
|
|
202 if(*second == 'o') {return parse_octal_integer (loc);} // 0o775
|
|
203 if(*second == 'x') {return parse_hexadecimal_integer(loc);} // 0xC0FFEE
|
|
204
|
|
205 if(std::isdigit(*second))
|
|
206 {
|
|
207 return err(format_underline("toml::parse_integer: "
|
|
208 "leading zero in an Integer is not allowed.",
|
|
209 {{source_location(loc), "leading zero"}}));
|
|
210 }
|
|
211 else if(std::isalpha(*second))
|
|
212 {
|
|
213 return err(format_underline("toml::parse_integer: "
|
|
214 "unknown integer prefix appeared.",
|
|
215 {{source_location(loc), "none of 0x, 0o, 0b"}}));
|
|
216 }
|
|
217 }
|
|
218
|
|
219 if(const auto token = lex_dec_int::invoke(loc))
|
|
220 {
|
|
221 auto str = token.unwrap().str();
|
|
222 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
223
|
|
224 std::istringstream iss(str);
|
|
225 integer retval(0);
|
|
226 iss >> retval;
|
|
227 if(iss.fail())
|
|
228 {
|
|
229 // see parse_octal_integer for detail of this error message.
|
|
230 loc.reset(first);
|
|
231 return err(format_underline("toml::parse_integer:",
|
|
232 {{source_location(loc), "out of range"}}));
|
|
233 }
|
|
234 return ok(std::make_pair(retval, token.unwrap()));
|
|
235 }
|
|
236 loc.reset(first);
|
|
237 return err(format_underline("toml::parse_integer: ",
|
|
238 {{source_location(loc), "the next token is not an integer"}}));
|
|
239 }
|
|
240
|
|
241 inline result<std::pair<floating, region>, std::string>
|
|
242 parse_floating(location& loc)
|
|
243 {
|
|
244 const auto first = loc.iter();
|
|
245 if(const auto token = lex_float::invoke(loc))
|
|
246 {
|
|
247 auto str = token.unwrap().str();
|
|
248 if(str == "inf" || str == "+inf")
|
|
249 {
|
|
250 if(std::numeric_limits<floating>::has_infinity)
|
|
251 {
|
|
252 return ok(std::make_pair(
|
|
253 std::numeric_limits<floating>::infinity(), token.unwrap()));
|
|
254 }
|
|
255 else
|
|
256 {
|
|
257 throw std::domain_error("toml::parse_floating: inf value found"
|
|
258 " but the current environment does not support inf. Please"
|
|
259 " make sure that the floating-point implementation conforms"
|
|
260 " IEEE 754/ISO 60559 international standard.");
|
|
261 }
|
|
262 }
|
|
263 else if(str == "-inf")
|
|
264 {
|
|
265 if(std::numeric_limits<floating>::has_infinity)
|
|
266 {
|
|
267 return ok(std::make_pair(
|
|
268 -std::numeric_limits<floating>::infinity(), token.unwrap()));
|
|
269 }
|
|
270 else
|
|
271 {
|
|
272 throw std::domain_error("toml::parse_floating: inf value found"
|
|
273 " but the current environment does not support inf. Please"
|
|
274 " make sure that the floating-point implementation conforms"
|
|
275 " IEEE 754/ISO 60559 international standard.");
|
|
276 }
|
|
277 }
|
|
278 else if(str == "nan" || str == "+nan")
|
|
279 {
|
|
280 if(std::numeric_limits<floating>::has_quiet_NaN)
|
|
281 {
|
|
282 return ok(std::make_pair(
|
|
283 std::numeric_limits<floating>::quiet_NaN(), token.unwrap()));
|
|
284 }
|
|
285 else if(std::numeric_limits<floating>::has_signaling_NaN)
|
|
286 {
|
|
287 return ok(std::make_pair(
|
|
288 std::numeric_limits<floating>::signaling_NaN(), token.unwrap()));
|
|
289 }
|
|
290 else
|
|
291 {
|
|
292 throw std::domain_error("toml::parse_floating: NaN value found"
|
|
293 " but the current environment does not support NaN. Please"
|
|
294 " make sure that the floating-point implementation conforms"
|
|
295 " IEEE 754/ISO 60559 international standard.");
|
|
296 }
|
|
297 }
|
|
298 else if(str == "-nan")
|
|
299 {
|
|
300 if(std::numeric_limits<floating>::has_quiet_NaN)
|
|
301 {
|
|
302 return ok(std::make_pair(
|
|
303 -std::numeric_limits<floating>::quiet_NaN(), token.unwrap()));
|
|
304 }
|
|
305 else if(std::numeric_limits<floating>::has_signaling_NaN)
|
|
306 {
|
|
307 return ok(std::make_pair(
|
|
308 -std::numeric_limits<floating>::signaling_NaN(), token.unwrap()));
|
|
309 }
|
|
310 else
|
|
311 {
|
|
312 throw std::domain_error("toml::parse_floating: NaN value found"
|
|
313 " but the current environment does not support NaN. Please"
|
|
314 " make sure that the floating-point implementation conforms"
|
|
315 " IEEE 754/ISO 60559 international standard.");
|
|
316 }
|
|
317 }
|
|
318 str.erase(std::remove(str.begin(), str.end(), '_'), str.end());
|
|
319 std::istringstream iss(str);
|
|
320 floating v(0.0);
|
|
321 iss >> v;
|
|
322 if(iss.fail())
|
|
323 {
|
|
324 // see parse_octal_integer for detail of this error message.
|
|
325 loc.reset(first);
|
|
326 return err(format_underline("toml::parse_floating:",
|
|
327 {{source_location(loc), "out of range"}}));
|
|
328 }
|
|
329 return ok(std::make_pair(v, token.unwrap()));
|
|
330 }
|
|
331 loc.reset(first);
|
|
332 return err(format_underline("toml::parse_floating: ",
|
|
333 {{source_location(loc), "the next token is not a float"}}));
|
|
334 }
|
|
335
|
|
336 inline std::string read_utf8_codepoint(const region& reg, const location& loc)
|
|
337 {
|
|
338 const auto str = reg.str().substr(1);
|
|
339 std::uint_least32_t codepoint;
|
|
340 std::istringstream iss(str);
|
|
341 iss >> std::hex >> codepoint;
|
|
342
|
|
343 const auto to_char = [](const std::uint_least32_t i) noexcept -> char {
|
|
344 const auto uc = static_cast<unsigned char>(i);
|
|
345 return *reinterpret_cast<const char*>(std::addressof(uc));
|
|
346 };
|
|
347
|
|
348 std::string character;
|
|
349 if(codepoint < 0x80) // U+0000 ... U+0079 ; just an ASCII.
|
|
350 {
|
|
351 character += static_cast<char>(codepoint);
|
|
352 }
|
|
353 else if(codepoint < 0x800) //U+0080 ... U+07FF
|
|
354 {
|
|
355 // 110yyyyx 10xxxxxx; 0x3f == 0b0011'1111
|
|
356 character += to_char(0xC0| codepoint >> 6);
|
|
357 character += to_char(0x80|(codepoint & 0x3F));
|
|
358 }
|
|
359 else if(codepoint < 0x10000) // U+0800...U+FFFF
|
|
360 {
|
|
361 if(0xD800 <= codepoint && codepoint <= 0xDFFF)
|
|
362 {
|
|
363 throw syntax_error(format_underline(
|
|
364 "toml::read_utf8_codepoint: codepoints in the range "
|
|
365 "[0xD800, 0xDFFF] are not valid UTF-8.", {{
|
|
366 source_location(loc), "not a valid UTF-8 codepoint"
|
|
367 }}), source_location(loc));
|
|
368 }
|
|
369 assert(codepoint < 0xD800 || 0xDFFF < codepoint);
|
|
370 // 1110yyyy 10yxxxxx 10xxxxxx
|
|
371 character += to_char(0xE0| codepoint >> 12);
|
|
372 character += to_char(0x80|(codepoint >> 6 & 0x3F));
|
|
373 character += to_char(0x80|(codepoint & 0x3F));
|
|
374 }
|
|
375 else if(codepoint < 0x110000) // U+010000 ... U+10FFFF
|
|
376 {
|
|
377 // 11110yyy 10yyxxxx 10xxxxxx 10xxxxxx
|
|
378 character += to_char(0xF0| codepoint >> 18);
|
|
379 character += to_char(0x80|(codepoint >> 12 & 0x3F));
|
|
380 character += to_char(0x80|(codepoint >> 6 & 0x3F));
|
|
381 character += to_char(0x80|(codepoint & 0x3F));
|
|
382 }
|
|
383 else // out of UTF-8 region
|
|
384 {
|
|
385 throw syntax_error(format_underline("toml::read_utf8_codepoint:"
|
|
386 " input codepoint is too large.",
|
|
387 {{source_location(loc), "should be in [0x00..0x10FFFF]"}}),
|
|
388 source_location(loc));
|
|
389 }
|
|
390 return character;
|
|
391 }
|
|
392
|
|
393 inline result<std::string, std::string> parse_escape_sequence(location& loc)
|
|
394 {
|
|
395 const auto first = loc.iter();
|
|
396 if(first == loc.end() || *first != '\\')
|
|
397 {
|
|
398 return err(format_underline("toml::parse_escape_sequence: ", {{
|
|
399 source_location(loc), "the next token is not a backslash \"\\\""}}));
|
|
400 }
|
|
401 loc.advance();
|
|
402 switch(*loc.iter())
|
|
403 {
|
|
404 case '\\':{loc.advance(); return ok(std::string("\\"));}
|
|
405 case '"' :{loc.advance(); return ok(std::string("\""));}
|
|
406 case 'b' :{loc.advance(); return ok(std::string("\b"));}
|
|
407 case 't' :{loc.advance(); return ok(std::string("\t"));}
|
|
408 case 'n' :{loc.advance(); return ok(std::string("\n"));}
|
|
409 case 'f' :{loc.advance(); return ok(std::string("\f"));}
|
|
410 case 'r' :{loc.advance(); return ok(std::string("\r"));}
|
|
411 #ifdef TOML11_USE_UNRELEASED_TOML_FEATURES
|
|
412 case 'e' :{loc.advance(); return ok(std::string("\x1b"));} // ESC
|
|
413 #endif
|
|
414 case 'u' :
|
|
415 {
|
|
416 if(const auto token = lex_escape_unicode_short::invoke(loc))
|
|
417 {
|
|
418 return ok(read_utf8_codepoint(token.unwrap(), loc));
|
|
419 }
|
|
420 else
|
|
421 {
|
|
422 return err(format_underline("parse_escape_sequence: "
|
|
423 "invalid token found in UTF-8 codepoint uXXXX.",
|
|
424 {{source_location(loc), "here"}}));
|
|
425 }
|
|
426 }
|
|
427 case 'U':
|
|
428 {
|
|
429 if(const auto token = lex_escape_unicode_long::invoke(loc))
|
|
430 {
|
|
431 return ok(read_utf8_codepoint(token.unwrap(), loc));
|
|
432 }
|
|
433 else
|
|
434 {
|
|
435 return err(format_underline("parse_escape_sequence: "
|
|
436 "invalid token found in UTF-8 codepoint Uxxxxxxxx",
|
|
437 {{source_location(loc), "here"}}));
|
|
438 }
|
|
439 }
|
|
440 }
|
|
441
|
|
442 const auto msg = format_underline("parse_escape_sequence: "
|
|
443 "unknown escape sequence appeared.", {{source_location(loc),
|
|
444 "escape sequence is one of \\, \", b, t, n, f, r, uxxxx, Uxxxxxxxx"}},
|
|
445 /* Hints = */{"if you want to write backslash as just one backslash, "
|
|
446 "use literal string like: regex = '<\\i\\c*\\s*>'"});
|
|
447 loc.reset(first);
|
|
448 return err(msg);
|
|
449 }
|
|
450
|
|
451 inline std::ptrdiff_t check_utf8_validity(const std::string& reg)
|
|
452 {
|
|
453 location loc("tmp", reg);
|
|
454 const auto u8 = repeat<lex_utf8_code, unlimited>::invoke(loc);
|
|
455 if(!u8 || loc.iter() != loc.end())
|
|
456 {
|
|
457 const auto error_location = std::distance(loc.begin(), loc.iter());
|
|
458 assert(0 <= error_location);
|
|
459 return error_location;
|
|
460 }
|
|
461 return -1;
|
|
462 }
|
|
463
|
|
464 inline result<std::pair<toml::string, region>, std::string>
|
|
465 parse_ml_basic_string(location& loc)
|
|
466 {
|
|
467 const auto first = loc.iter();
|
|
468 if(const auto token = lex_ml_basic_string::invoke(loc))
|
|
469 {
|
|
470 auto inner_loc = loc;
|
|
471 inner_loc.reset(first);
|
|
472
|
|
473 std::string retval;
|
|
474 retval.reserve(token.unwrap().size());
|
|
475
|
|
476 auto delim = lex_ml_basic_string_open::invoke(inner_loc);
|
|
477 if(!delim)
|
|
478 {
|
|
479 throw internal_error(format_underline(
|
|
480 "parse_ml_basic_string: invalid token",
|
|
481 {{source_location(inner_loc), "should be \"\"\""}}),
|
|
482 source_location(inner_loc));
|
|
483 }
|
|
484 // immediate newline is ignored (if exists)
|
|
485 /* discard return value */ lex_newline::invoke(inner_loc);
|
|
486
|
|
487 delim = none();
|
|
488 while(!delim)
|
|
489 {
|
|
490 using lex_unescaped_seq = repeat<
|
|
491 either<lex_ml_basic_unescaped, lex_newline>, unlimited>;
|
|
492 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
|
|
493 {
|
|
494 retval += unescaped.unwrap().str();
|
|
495 }
|
|
496 if(auto escaped = parse_escape_sequence(inner_loc))
|
|
497 {
|
|
498 retval += escaped.unwrap();
|
|
499 }
|
|
500 if(auto esc_nl = lex_ml_basic_escaped_newline::invoke(inner_loc))
|
|
501 {
|
|
502 // ignore newline after escape until next non-ws char
|
|
503 }
|
|
504 if(inner_loc.iter() == inner_loc.end())
|
|
505 {
|
|
506 throw internal_error(format_underline(
|
|
507 "parse_ml_basic_string: unexpected end of region",
|
|
508 {{source_location(inner_loc), "not sufficient token"}}),
|
|
509 source_location(inner_loc));
|
|
510 }
|
|
511 delim = lex_ml_basic_string_close::invoke(inner_loc);
|
|
512 }
|
|
513 // `lex_ml_basic_string_close` allows 3 to 5 `"`s to allow 1 or 2 `"`s
|
|
514 // at just before the delimiter. Here, we need to attach `"`s at the
|
|
515 // end of the string body, if it exists.
|
|
516 // For detail, see the definition of `lex_ml_basic_string_close`.
|
|
517 assert(std::all_of(delim.unwrap().first(), delim.unwrap().last(),
|
|
518 [](const char c) noexcept {return c == '\"';}));
|
|
519 switch(delim.unwrap().size())
|
|
520 {
|
|
521 case 3: {break;}
|
|
522 case 4: {retval += "\""; break;}
|
|
523 case 5: {retval += "\"\""; break;}
|
|
524 default:
|
|
525 {
|
|
526 throw internal_error(format_underline(
|
|
527 "parse_ml_basic_string: closing delimiter has invalid length",
|
|
528 {{source_location(inner_loc), "end of this"}}),
|
|
529 source_location(inner_loc));
|
|
530 }
|
|
531 }
|
|
532
|
|
533 const auto err_loc = check_utf8_validity(token.unwrap().str());
|
|
534 if(err_loc == -1)
|
|
535 {
|
|
536 return ok(std::make_pair(toml::string(retval), token.unwrap()));
|
|
537 }
|
|
538 else
|
|
539 {
|
|
540 inner_loc.reset(first);
|
|
541 inner_loc.advance(err_loc);
|
|
542 throw syntax_error(format_underline(
|
|
543 "parse_ml_basic_string: invalid utf8 sequence found",
|
|
544 {{source_location(inner_loc), "here"}}),
|
|
545 source_location(inner_loc));
|
|
546 }
|
|
547 }
|
|
548 else
|
|
549 {
|
|
550 loc.reset(first);
|
|
551 return err(format_underline("toml::parse_ml_basic_string: "
|
|
552 "the next token is not a valid multiline string",
|
|
553 {{source_location(loc), "here"}}));
|
|
554 }
|
|
555 }
|
|
556
|
|
557 inline result<std::pair<toml::string, region>, std::string>
|
|
558 parse_basic_string(location& loc)
|
|
559 {
|
|
560 const auto first = loc.iter();
|
|
561 if(const auto token = lex_basic_string::invoke(loc))
|
|
562 {
|
|
563 auto inner_loc = loc;
|
|
564 inner_loc.reset(first);
|
|
565
|
|
566 auto quot = lex_quotation_mark::invoke(inner_loc);
|
|
567 if(!quot)
|
|
568 {
|
|
569 throw internal_error(format_underline("parse_basic_string: "
|
|
570 "invalid token", {{source_location(inner_loc), "should be \""}}),
|
|
571 source_location(inner_loc));
|
|
572 }
|
|
573
|
|
574 std::string retval;
|
|
575 retval.reserve(token.unwrap().size());
|
|
576
|
|
577 quot = none();
|
|
578 while(!quot)
|
|
579 {
|
|
580 using lex_unescaped_seq = repeat<lex_basic_unescaped, unlimited>;
|
|
581 if(auto unescaped = lex_unescaped_seq::invoke(inner_loc))
|
|
582 {
|
|
583 retval += unescaped.unwrap().str();
|
|
584 }
|
|
585 if(auto escaped = parse_escape_sequence(inner_loc))
|
|
586 {
|
|
587 retval += escaped.unwrap();
|
|
588 }
|
|
589 if(inner_loc.iter() == inner_loc.end())
|
|
590 {
|
|
591 throw internal_error(format_underline(
|
|
592 "parse_basic_string: unexpected end of region",
|
|
593 {{source_location(inner_loc), "not sufficient token"}}),
|
|
594 source_location(inner_loc));
|
|
595 }
|
|
596 quot = lex_quotation_mark::invoke(inner_loc);
|
|
597 }
|
|
598
|
|
599 const auto err_loc = check_utf8_validity(token.unwrap().str());
|
|
600 if(err_loc == -1)
|
|
601 {
|
|
602 return ok(std::make_pair(toml::string(retval), token.unwrap()));
|
|
603 }
|
|
604 else
|
|
605 {
|
|
606 inner_loc.reset(first);
|
|
607 inner_loc.advance(err_loc);
|
|
608 throw syntax_error(format_underline(
|
|
609 "parse_basic_string: invalid utf8 sequence found",
|
|
610 {{source_location(inner_loc), "here"}}),
|
|
611 source_location(inner_loc));
|
|
612 }
|
|
613 }
|
|
614 else
|
|
615 {
|
|
616 loc.reset(first); // rollback
|
|
617 return err(format_underline("toml::parse_basic_string: "
|
|
618 "the next token is not a valid string",
|
|
619 {{source_location(loc), "here"}}));
|
|
620 }
|
|
621 }
|
|
622
|
|
623 inline result<std::pair<toml::string, region>, std::string>
|
|
624 parse_ml_literal_string(location& loc)
|
|
625 {
|
|
626 const auto first = loc.iter();
|
|
627 if(const auto token = lex_ml_literal_string::invoke(loc))
|
|
628 {
|
|
629 auto inner_loc = loc;
|
|
630 inner_loc.reset(first);
|
|
631
|
|
632 const auto open = lex_ml_literal_string_open::invoke(inner_loc);
|
|
633 if(!open)
|
|
634 {
|
|
635 throw internal_error(format_underline(
|
|
636 "parse_ml_literal_string: invalid token",
|
|
637 {{source_location(inner_loc), "should be '''"}}),
|
|
638 source_location(inner_loc));
|
|
639 }
|
|
640 // immediate newline is ignored (if exists)
|
|
641 /* discard return value */ lex_newline::invoke(inner_loc);
|
|
642
|
|
643 const auto body = lex_ml_literal_body::invoke(inner_loc);
|
|
644
|
|
645 const auto close = lex_ml_literal_string_close::invoke(inner_loc);
|
|
646 if(!close)
|
|
647 {
|
|
648 throw internal_error(format_underline(
|
|
649 "parse_ml_literal_string: invalid token",
|
|
650 {{source_location(inner_loc), "should be '''"}}),
|
|
651 source_location(inner_loc));
|
|
652 }
|
|
653 // `lex_ml_literal_string_close` allows 3 to 5 `'`s to allow 1 or 2 `'`s
|
|
654 // at just before the delimiter. Here, we need to attach `'`s at the
|
|
655 // end of the string body, if it exists.
|
|
656 // For detail, see the definition of `lex_ml_basic_string_close`.
|
|
657
|
|
658 std::string retval = body.unwrap().str();
|
|
659 assert(std::all_of(close.unwrap().first(), close.unwrap().last(),
|
|
660 [](const char c) noexcept {return c == '\'';}));
|
|
661 switch(close.unwrap().size())
|
|
662 {
|
|
663 case 3: {break;}
|
|
664 case 4: {retval += "'"; break;}
|
|
665 case 5: {retval += "''"; break;}
|
|
666 default:
|
|
667 {
|
|
668 throw internal_error(format_underline(
|
|
669 "parse_ml_literal_string: closing delimiter has invalid length",
|
|
670 {{source_location(inner_loc), "end of this"}}),
|
|
671 source_location(inner_loc));
|
|
672 }
|
|
673 }
|
|
674
|
|
675 const auto err_loc = check_utf8_validity(token.unwrap().str());
|
|
676 if(err_loc == -1)
|
|
677 {
|
|
678 return ok(std::make_pair(toml::string(retval, toml::string_t::literal),
|
|
679 token.unwrap()));
|
|
680 }
|
|
681 else
|
|
682 {
|
|
683 inner_loc.reset(first);
|
|
684 inner_loc.advance(err_loc);
|
|
685 throw syntax_error(format_underline(
|
|
686 "parse_ml_literal_string: invalid utf8 sequence found",
|
|
687 {{source_location(inner_loc), "here"}}),
|
|
688 source_location(inner_loc));
|
|
689 }
|
|
690 }
|
|
691 else
|
|
692 {
|
|
693 loc.reset(first); // rollback
|
|
694 return err(format_underline("toml::parse_ml_literal_string: "
|
|
695 "the next token is not a valid multiline literal string",
|
|
696 {{source_location(loc), "here"}}));
|
|
697 }
|
|
698 }
|
|
699
|
|
700 inline result<std::pair<toml::string, region>, std::string>
|
|
701 parse_literal_string(location& loc)
|
|
702 {
|
|
703 const auto first = loc.iter();
|
|
704 if(const auto token = lex_literal_string::invoke(loc))
|
|
705 {
|
|
706 auto inner_loc = loc;
|
|
707 inner_loc.reset(first);
|
|
708
|
|
709 const auto open = lex_apostrophe::invoke(inner_loc);
|
|
710 if(!open)
|
|
711 {
|
|
712 throw internal_error(format_underline(
|
|
713 "parse_literal_string: invalid token",
|
|
714 {{source_location(inner_loc), "should be '"}}),
|
|
715 source_location(inner_loc));
|
|
716 }
|
|
717
|
|
718 const auto body = repeat<lex_literal_char, unlimited>::invoke(inner_loc);
|
|
719
|
|
720 const auto close = lex_apostrophe::invoke(inner_loc);
|
|
721 if(!close)
|
|
722 {
|
|
723 throw internal_error(format_underline(
|
|
724 "parse_literal_string: invalid token",
|
|
725 {{source_location(inner_loc), "should be '"}}),
|
|
726 source_location(inner_loc));
|
|
727 }
|
|
728
|
|
729 const auto err_loc = check_utf8_validity(token.unwrap().str());
|
|
730 if(err_loc == -1)
|
|
731 {
|
|
732 return ok(std::make_pair(
|
|
733 toml::string(body.unwrap().str(), toml::string_t::literal),
|
|
734 token.unwrap()));
|
|
735 }
|
|
736 else
|
|
737 {
|
|
738 inner_loc.reset(first);
|
|
739 inner_loc.advance(err_loc);
|
|
740 throw syntax_error(format_underline(
|
|
741 "parse_literal_string: invalid utf8 sequence found",
|
|
742 {{source_location(inner_loc), "here"}}),
|
|
743 source_location(inner_loc));
|
|
744 }
|
|
745 }
|
|
746 else
|
|
747 {
|
|
748 loc.reset(first); // rollback
|
|
749 return err(format_underline("toml::parse_literal_string: "
|
|
750 "the next token is not a valid literal string",
|
|
751 {{source_location(loc), "here"}}));
|
|
752 }
|
|
753 }
|
|
754
|
|
755 inline result<std::pair<toml::string, region>, std::string>
|
|
756 parse_string(location& loc)
|
|
757 {
|
|
758 if(loc.iter() != loc.end() && *(loc.iter()) == '"')
|
|
759 {
|
|
760 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '"' &&
|
|
761 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '"')
|
|
762 {
|
|
763 return parse_ml_basic_string(loc);
|
|
764 }
|
|
765 else
|
|
766 {
|
|
767 return parse_basic_string(loc);
|
|
768 }
|
|
769 }
|
|
770 else if(loc.iter() != loc.end() && *(loc.iter()) == '\'')
|
|
771 {
|
|
772 if(loc.iter() + 1 != loc.end() && *(loc.iter() + 1) == '\'' &&
|
|
773 loc.iter() + 2 != loc.end() && *(loc.iter() + 2) == '\'')
|
|
774 {
|
|
775 return parse_ml_literal_string(loc);
|
|
776 }
|
|
777 else
|
|
778 {
|
|
779 return parse_literal_string(loc);
|
|
780 }
|
|
781 }
|
|
782 return err(format_underline("toml::parse_string: ",
|
|
783 {{source_location(loc), "the next token is not a string"}}));
|
|
784 }
|
|
785
|
|
786 inline result<std::pair<local_date, region>, std::string>
|
|
787 parse_local_date(location& loc)
|
|
788 {
|
|
789 const auto first = loc.iter();
|
|
790 if(const auto token = lex_local_date::invoke(loc))
|
|
791 {
|
|
792 location inner_loc(loc.name(), token.unwrap().str());
|
|
793
|
|
794 const auto y = lex_date_fullyear::invoke(inner_loc);
|
|
795 if(!y || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
|
|
796 {
|
|
797 throw internal_error(format_underline(
|
|
798 "toml::parse_local_date: invalid year format",
|
|
799 {{source_location(inner_loc), "should be `-`"}}),
|
|
800 source_location(inner_loc));
|
|
801 }
|
|
802 inner_loc.advance();
|
|
803 const auto m = lex_date_month::invoke(inner_loc);
|
|
804 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != '-')
|
|
805 {
|
|
806 throw internal_error(format_underline(
|
|
807 "toml::parse_local_date: invalid month format",
|
|
808 {{source_location(inner_loc), "should be `-`"}}),
|
|
809 source_location(inner_loc));
|
|
810 }
|
|
811 inner_loc.advance();
|
|
812 const auto d = lex_date_mday::invoke(inner_loc);
|
|
813 if(!d)
|
|
814 {
|
|
815 throw internal_error(format_underline(
|
|
816 "toml::parse_local_date: invalid day format",
|
|
817 {{source_location(inner_loc), "here"}}),
|
|
818 source_location(inner_loc));
|
|
819 }
|
|
820
|
|
821 const auto year = static_cast<std::int16_t>(from_string<int>(y.unwrap().str(), 0));
|
|
822 const auto month = static_cast<std::int8_t >(from_string<int>(m.unwrap().str(), 0));
|
|
823 const auto day = static_cast<std::int8_t >(from_string<int>(d.unwrap().str(), 0));
|
|
824
|
|
825 // We briefly check whether the input date is valid or not. But here, we
|
|
826 // only check if the RFC3339 compliance.
|
|
827 // Actually there are several special date that does not exist,
|
|
828 // because of historical reasons, such as 1582/10/5-1582/10/14 (only in
|
|
829 // several countries). But here, we do not care about such a complicated
|
|
830 // rule. It makes the code complicated and there is only low probability
|
|
831 // that such a specific date is needed in practice. If someone need to
|
|
832 // validate date accurately, that means that the one need a specialized
|
|
833 // library for their purpose in a different layer.
|
|
834 {
|
|
835 const bool is_leap = (year % 4 == 0) && ((year % 100 != 0) || (year % 400 == 0));
|
|
836 const auto max_day = (month == 2) ? (is_leap ? 29 : 28) :
|
|
837 ((month == 4 || month == 6 || month == 9 || month == 11) ? 30 : 31);
|
|
838
|
|
839 if((month < 1 || 12 < month) || (day < 1 || max_day < day))
|
|
840 {
|
|
841 throw syntax_error(format_underline("toml::parse_date: "
|
|
842 "invalid date: it does not conform RFC3339.", {{
|
|
843 source_location(loc), "month should be 01-12, day should be"
|
|
844 " 01-28,29,30,31, depending on month/year."
|
|
845 }}), source_location(inner_loc));
|
|
846 }
|
|
847 }
|
|
848 return ok(std::make_pair(local_date(year, static_cast<month_t>(month - 1), day),
|
|
849 token.unwrap()));
|
|
850 }
|
|
851 else
|
|
852 {
|
|
853 loc.reset(first);
|
|
854 return err(format_underline("toml::parse_local_date: ",
|
|
855 {{source_location(loc), "the next token is not a local_date"}}));
|
|
856 }
|
|
857 }
|
|
858
|
|
859 inline result<std::pair<local_time, region>, std::string>
|
|
860 parse_local_time(location& loc)
|
|
861 {
|
|
862 const auto first = loc.iter();
|
|
863 if(const auto token = lex_local_time::invoke(loc))
|
|
864 {
|
|
865 location inner_loc(loc.name(), token.unwrap().str());
|
|
866
|
|
867 const auto h = lex_time_hour::invoke(inner_loc);
|
|
868 if(!h || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
|
|
869 {
|
|
870 throw internal_error(format_underline(
|
|
871 "toml::parse_local_time: invalid year format",
|
|
872 {{source_location(inner_loc), "should be `:`"}}),
|
|
873 source_location(inner_loc));
|
|
874 }
|
|
875 inner_loc.advance();
|
|
876 const auto m = lex_time_minute::invoke(inner_loc);
|
|
877 if(!m || inner_loc.iter() == inner_loc.end() || *inner_loc.iter() != ':')
|
|
878 {
|
|
879 throw internal_error(format_underline(
|
|
880 "toml::parse_local_time: invalid month format",
|
|
881 {{source_location(inner_loc), "should be `:`"}}),
|
|
882 source_location(inner_loc));
|
|
883 }
|
|
884 inner_loc.advance();
|
|
885 const auto s = lex_time_second::invoke(inner_loc);
|
|
886 if(!s)
|
|
887 {
|
|
888 throw internal_error(format_underline(
|
|
889 "toml::parse_local_time: invalid second format",
|
|
890 {{source_location(inner_loc), "here"}}),
|
|
891 source_location(inner_loc));
|
|
892 }
|
|
893
|
|
894 const int hour = from_string<int>(h.unwrap().str(), 0);
|
|
895 const int minute = from_string<int>(m.unwrap().str(), 0);
|
|
896 const int second = from_string<int>(s.unwrap().str(), 0);
|
|
897
|
|
898 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute) ||
|
|
899 (second < 0 || 60 < second)) // it may be leap second
|
|
900 {
|
|
901 throw syntax_error(format_underline("toml::parse_local_time: "
|
|
902 "invalid time: it does not conform RFC3339.", {{
|
|
903 source_location(loc), "hour should be 00-23, minute should be"
|
|
904 " 00-59, second should be 00-60 (depending on the leap"
|
|
905 " second rules.)"}}), source_location(inner_loc));
|
|
906 }
|
|
907
|
|
908 local_time time(hour, minute, second, 0, 0);
|
|
909
|
|
910 const auto before_secfrac = inner_loc.iter();
|
|
911 if(const auto secfrac = lex_time_secfrac::invoke(inner_loc))
|
|
912 {
|
|
913 auto sf = secfrac.unwrap().str();
|
|
914 sf.erase(sf.begin()); // sf.front() == '.'
|
|
915 switch(sf.size() % 3)
|
|
916 {
|
|
917 case 2: sf += '0'; break;
|
|
918 case 1: sf += "00"; break;
|
|
919 case 0: break;
|
|
920 default: break;
|
|
921 }
|
|
922 if(sf.size() >= 9)
|
|
923 {
|
|
924 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u);
|
|
925 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u);
|
|
926 time.nanosecond = from_string<std::uint16_t>(sf.substr(6, 3), 0u);
|
|
927 }
|
|
928 else if(sf.size() >= 6)
|
|
929 {
|
|
930 time.millisecond = from_string<std::uint16_t>(sf.substr(0, 3), 0u);
|
|
931 time.microsecond = from_string<std::uint16_t>(sf.substr(3, 3), 0u);
|
|
932 }
|
|
933 else if(sf.size() >= 3)
|
|
934 {
|
|
935 time.millisecond = from_string<std::uint16_t>(sf, 0u);
|
|
936 time.microsecond = 0u;
|
|
937 }
|
|
938 }
|
|
939 else
|
|
940 {
|
|
941 if(before_secfrac != inner_loc.iter())
|
|
942 {
|
|
943 throw internal_error(format_underline(
|
|
944 "toml::parse_local_time: invalid subsecond format",
|
|
945 {{source_location(inner_loc), "here"}}),
|
|
946 source_location(inner_loc));
|
|
947 }
|
|
948 }
|
|
949 return ok(std::make_pair(time, token.unwrap()));
|
|
950 }
|
|
951 else
|
|
952 {
|
|
953 loc.reset(first);
|
|
954 return err(format_underline("toml::parse_local_time: ",
|
|
955 {{source_location(loc), "the next token is not a local_time"}}));
|
|
956 }
|
|
957 }
|
|
958
|
|
959 inline result<std::pair<local_datetime, region>, std::string>
|
|
960 parse_local_datetime(location& loc)
|
|
961 {
|
|
962 const auto first = loc.iter();
|
|
963 if(const auto token = lex_local_date_time::invoke(loc))
|
|
964 {
|
|
965 location inner_loc(loc.name(), token.unwrap().str());
|
|
966 const auto date = parse_local_date(inner_loc);
|
|
967 if(!date || inner_loc.iter() == inner_loc.end())
|
|
968 {
|
|
969 throw internal_error(format_underline(
|
|
970 "toml::parse_local_datetime: invalid datetime format",
|
|
971 {{source_location(inner_loc), "date, not datetime"}}),
|
|
972 source_location(inner_loc));
|
|
973 }
|
|
974 const char delim = *(inner_loc.iter());
|
|
975 if(delim != 'T' && delim != 't' && delim != ' ')
|
|
976 {
|
|
977 throw internal_error(format_underline(
|
|
978 "toml::parse_local_datetime: invalid datetime format",
|
|
979 {{source_location(inner_loc), "should be `T` or ` ` (space)"}}),
|
|
980 source_location(inner_loc));
|
|
981 }
|
|
982 inner_loc.advance();
|
|
983 const auto time = parse_local_time(inner_loc);
|
|
984 if(!time)
|
|
985 {
|
|
986 throw internal_error(format_underline(
|
|
987 "toml::parse_local_datetime: invalid datetime format",
|
|
988 {{source_location(inner_loc), "invalid time format"}}),
|
|
989 source_location(inner_loc));
|
|
990 }
|
|
991 return ok(std::make_pair(
|
|
992 local_datetime(date.unwrap().first, time.unwrap().first),
|
|
993 token.unwrap()));
|
|
994 }
|
|
995 else
|
|
996 {
|
|
997 loc.reset(first);
|
|
998 return err(format_underline("toml::parse_local_datetime: ",
|
|
999 {{source_location(loc), "the next token is not a local_datetime"}}));
|
|
1000 }
|
|
1001 }
|
|
1002
|
|
1003 inline result<std::pair<offset_datetime, region>, std::string>
|
|
1004 parse_offset_datetime(location& loc)
|
|
1005 {
|
|
1006 const auto first = loc.iter();
|
|
1007 if(const auto token = lex_offset_date_time::invoke(loc))
|
|
1008 {
|
|
1009 location inner_loc(loc.name(), token.unwrap().str());
|
|
1010 const auto datetime = parse_local_datetime(inner_loc);
|
|
1011 if(!datetime || inner_loc.iter() == inner_loc.end())
|
|
1012 {
|
|
1013 throw internal_error(format_underline(
|
|
1014 "toml::parse_offset_datetime: invalid datetime format",
|
|
1015 {{source_location(inner_loc), "date, not datetime"}}),
|
|
1016 source_location(inner_loc));
|
|
1017 }
|
|
1018 time_offset offset(0, 0);
|
|
1019 if(const auto ofs = lex_time_numoffset::invoke(inner_loc))
|
|
1020 {
|
|
1021 const auto str = ofs.unwrap().str();
|
|
1022
|
|
1023 const auto hour = from_string<int>(str.substr(1,2), 0);
|
|
1024 const auto minute = from_string<int>(str.substr(4,2), 0);
|
|
1025
|
|
1026 if((hour < 0 || 23 < hour) || (minute < 0 || 59 < minute))
|
|
1027 {
|
|
1028 throw syntax_error(format_underline("toml::parse_offset_datetime: "
|
|
1029 "invalid offset: it does not conform RFC3339.", {{
|
|
1030 source_location(loc), "month should be 01-12, day should be"
|
|
1031 " 01-28,29,30,31, depending on month/year."
|
|
1032 }}), source_location(inner_loc));
|
|
1033 }
|
|
1034
|
|
1035 if(str.front() == '+')
|
|
1036 {
|
|
1037 offset = time_offset(hour, minute);
|
|
1038 }
|
|
1039 else
|
|
1040 {
|
|
1041 offset = time_offset(-hour, -minute);
|
|
1042 }
|
|
1043 }
|
|
1044 else if(*inner_loc.iter() != 'Z' && *inner_loc.iter() != 'z')
|
|
1045 {
|
|
1046 throw internal_error(format_underline(
|
|
1047 "toml::parse_offset_datetime: invalid datetime format",
|
|
1048 {{source_location(inner_loc), "should be `Z` or `+HH:MM`"}}),
|
|
1049 source_location(inner_loc));
|
|
1050 }
|
|
1051 return ok(std::make_pair(offset_datetime(datetime.unwrap().first, offset),
|
|
1052 token.unwrap()));
|
|
1053 }
|
|
1054 else
|
|
1055 {
|
|
1056 loc.reset(first);
|
|
1057 return err(format_underline("toml::parse_offset_datetime: ",
|
|
1058 {{source_location(loc), "the next token is not a offset_datetime"}}));
|
|
1059 }
|
|
1060 }
|
|
1061
|
|
1062 inline result<std::pair<key, region>, std::string>
|
|
1063 parse_simple_key(location& loc)
|
|
1064 {
|
|
1065 if(const auto bstr = parse_basic_string(loc))
|
|
1066 {
|
|
1067 return ok(std::make_pair(bstr.unwrap().first.str, bstr.unwrap().second));
|
|
1068 }
|
|
1069 if(const auto lstr = parse_literal_string(loc))
|
|
1070 {
|
|
1071 return ok(std::make_pair(lstr.unwrap().first.str, lstr.unwrap().second));
|
|
1072 }
|
|
1073 if(const auto bare = lex_unquoted_key::invoke(loc))
|
|
1074 {
|
|
1075 const auto reg = bare.unwrap();
|
|
1076 return ok(std::make_pair(reg.str(), reg));
|
|
1077 }
|
|
1078 return err(format_underline("toml::parse_simple_key: ",
|
|
1079 {{source_location(loc), "the next token is not a simple key"}}));
|
|
1080 }
|
|
1081
|
|
1082 // dotted key become vector of keys
|
|
1083 inline result<std::pair<std::vector<key>, region>, std::string>
|
|
1084 parse_key(location& loc)
|
|
1085 {
|
|
1086 const auto first = loc.iter();
|
|
1087 // dotted key -> `foo.bar.baz` where several single keys are chained by
|
|
1088 // dots. Whitespaces between keys and dots are allowed.
|
|
1089 if(const auto token = lex_dotted_key::invoke(loc))
|
|
1090 {
|
|
1091 const auto reg = token.unwrap();
|
|
1092 location inner_loc(loc.name(), reg.str());
|
|
1093 std::vector<key> keys;
|
|
1094
|
|
1095 while(inner_loc.iter() != inner_loc.end())
|
|
1096 {
|
|
1097 lex_ws::invoke(inner_loc);
|
|
1098 if(const auto k = parse_simple_key(inner_loc))
|
|
1099 {
|
|
1100 keys.push_back(k.unwrap().first);
|
|
1101 }
|
|
1102 else
|
|
1103 {
|
|
1104 throw internal_error(format_underline(
|
|
1105 "toml::parse_key: dotted key contains invalid key",
|
|
1106 {{source_location(inner_loc), k.unwrap_err()}}),
|
|
1107 source_location(inner_loc));
|
|
1108 }
|
|
1109
|
|
1110 lex_ws::invoke(inner_loc);
|
|
1111 if(inner_loc.iter() == inner_loc.end())
|
|
1112 {
|
|
1113 break;
|
|
1114 }
|
|
1115 else if(*inner_loc.iter() == '.')
|
|
1116 {
|
|
1117 inner_loc.advance(); // to skip `.`
|
|
1118 }
|
|
1119 else
|
|
1120 {
|
|
1121 throw internal_error(format_underline("toml::parse_key: "
|
|
1122 "dotted key contains invalid key ",
|
|
1123 {{source_location(inner_loc), "should be `.`"}}),
|
|
1124 source_location(inner_loc));
|
|
1125 }
|
|
1126 }
|
|
1127 return ok(std::make_pair(keys, reg));
|
|
1128 }
|
|
1129 loc.reset(first);
|
|
1130
|
|
1131 // simple_key: a single (basic_string|literal_string|bare key)
|
|
1132 if(const auto smpl = parse_simple_key(loc))
|
|
1133 {
|
|
1134 return ok(std::make_pair(std::vector<key>(1, smpl.unwrap().first),
|
|
1135 smpl.unwrap().second));
|
|
1136 }
|
|
1137 return err(format_underline("toml::parse_key: an invalid key appeared.",
|
|
1138 {{source_location(loc), "is not a valid key"}}, {
|
|
1139 "bare keys : non-empty strings composed only of [A-Za-z0-9_-].",
|
|
1140 "quoted keys: same as \"basic strings\" or 'literal strings'.",
|
|
1141 "dotted keys: sequence of bare or quoted keys joined with a dot."
|
|
1142 }));
|
|
1143 }
|
|
1144
|
|
1145 // forward-decl to implement parse_array and parse_table
|
|
1146 template<typename Value>
|
|
1147 result<Value, std::string> parse_value(location&, const std::size_t n_rec);
|
|
1148
|
|
1149 template<typename Value>
|
|
1150 result<std::pair<typename Value::array_type, region>, std::string>
|
|
1151 parse_array(location& loc, const std::size_t n_rec)
|
|
1152 {
|
|
1153 using value_type = Value;
|
|
1154 using array_type = typename value_type::array_type;
|
|
1155
|
|
1156 if(n_rec > TOML11_VALUE_RECURSION_LIMIT)
|
|
1157 {
|
|
1158 // parse_array does not have any way to handle recursive error currently...
|
|
1159 throw syntax_error(std::string("toml::parse_array: recursion limit ("
|
|
1160 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"),
|
|
1161 source_location(loc));
|
|
1162 }
|
|
1163
|
|
1164 const auto first = loc.iter();
|
|
1165 if(loc.iter() == loc.end())
|
|
1166 {
|
|
1167 return err("toml::parse_array: input is empty");
|
|
1168 }
|
|
1169 if(*loc.iter() != '[')
|
|
1170 {
|
|
1171 return err("toml::parse_array: token is not an array");
|
|
1172 }
|
|
1173 loc.advance();
|
|
1174
|
|
1175 using lex_ws_comment_newline = repeat<
|
|
1176 either<lex_wschar, lex_newline, lex_comment>, unlimited>;
|
|
1177
|
|
1178 array_type retval;
|
|
1179 while(loc.iter() != loc.end())
|
|
1180 {
|
|
1181 lex_ws_comment_newline::invoke(loc); // skip
|
|
1182
|
|
1183 if(loc.iter() != loc.end() && *loc.iter() == ']')
|
|
1184 {
|
|
1185 loc.advance(); // skip ']'
|
|
1186 return ok(std::make_pair(retval,
|
|
1187 region(loc, first, loc.iter())));
|
|
1188 }
|
|
1189
|
|
1190 if(auto val = parse_value<value_type>(loc, n_rec+1))
|
|
1191 {
|
|
1192 // After TOML v1.0.0-rc.1, array becomes to be able to have values
|
|
1193 // with different types. So here we will omit this by default.
|
|
1194 //
|
|
1195 // But some of the test-suite checks if the parser accepts a hetero-
|
|
1196 // geneous arrays, so we keep this for a while.
|
|
1197 #ifdef TOML11_DISALLOW_HETEROGENEOUS_ARRAYS
|
|
1198 if(!retval.empty() && retval.front().type() != val.as_ok().type())
|
|
1199 {
|
|
1200 auto array_start_loc = loc;
|
|
1201 array_start_loc.reset(first);
|
|
1202
|
|
1203 throw syntax_error(format_underline("toml::parse_array: "
|
|
1204 "type of elements should be the same each other.", {
|
|
1205 {source_location(array_start_loc), "array starts here"},
|
|
1206 {
|
|
1207 retval.front().location(),
|
|
1208 "value has type " + stringize(retval.front().type())
|
|
1209 },
|
|
1210 {
|
|
1211 val.unwrap().location(),
|
|
1212 "value has different type, " + stringize(val.unwrap().type())
|
|
1213 }
|
|
1214 }), source_location(loc));
|
|
1215 }
|
|
1216 #endif
|
|
1217 retval.push_back(std::move(val.unwrap()));
|
|
1218 }
|
|
1219 else
|
|
1220 {
|
|
1221 auto array_start_loc = loc;
|
|
1222 array_start_loc.reset(first);
|
|
1223
|
|
1224 throw syntax_error(format_underline("toml::parse_array: "
|
|
1225 "value having invalid format appeared in an array", {
|
|
1226 {source_location(array_start_loc), "array starts here"},
|
|
1227 {source_location(loc), "it is not a valid value."}
|
|
1228 }), source_location(loc));
|
|
1229 }
|
|
1230
|
|
1231 using lex_array_separator = sequence<maybe<lex_ws_comment_newline>, character<','>>;
|
|
1232 const auto sp = lex_array_separator::invoke(loc);
|
|
1233 if(!sp)
|
|
1234 {
|
|
1235 lex_ws_comment_newline::invoke(loc);
|
|
1236 if(loc.iter() != loc.end() && *loc.iter() == ']')
|
|
1237 {
|
|
1238 loc.advance(); // skip ']'
|
|
1239 return ok(std::make_pair(retval,
|
|
1240 region(loc, first, loc.iter())));
|
|
1241 }
|
|
1242 else
|
|
1243 {
|
|
1244 auto array_start_loc = loc;
|
|
1245 array_start_loc.reset(first);
|
|
1246
|
|
1247 throw syntax_error(format_underline("toml::parse_array:"
|
|
1248 " missing array separator `,` after a value", {
|
|
1249 {source_location(array_start_loc), "array starts here"},
|
|
1250 {source_location(loc), "should be `,`"}
|
|
1251 }), source_location(loc));
|
|
1252 }
|
|
1253 }
|
|
1254 }
|
|
1255 loc.reset(first);
|
|
1256 throw syntax_error(format_underline("toml::parse_array: "
|
|
1257 "array did not closed by `]`",
|
|
1258 {{source_location(loc), "should be closed"}}),
|
|
1259 source_location(loc));
|
|
1260 }
|
|
1261
|
|
1262 template<typename Value>
|
|
1263 result<std::pair<std::pair<std::vector<key>, region>, Value>, std::string>
|
|
1264 parse_key_value_pair(location& loc, const std::size_t n_rec)
|
|
1265 {
|
|
1266 using value_type = Value;
|
|
1267
|
|
1268 const auto first = loc.iter();
|
|
1269 auto key_reg = parse_key(loc);
|
|
1270 if(!key_reg)
|
|
1271 {
|
|
1272 std::string msg = std::move(key_reg.unwrap_err());
|
|
1273 // if the next token is keyvalue-separator, it means that there are no
|
|
1274 // key. then we need to show error as "empty key is not allowed".
|
|
1275 if(const auto keyval_sep = lex_keyval_sep::invoke(loc))
|
|
1276 {
|
|
1277 loc.reset(first);
|
|
1278 msg = format_underline("toml::parse_key_value_pair: "
|
|
1279 "empty key is not allowed.",
|
|
1280 {{source_location(loc), "key expected before '='"}});
|
|
1281 }
|
|
1282 return err(std::move(msg));
|
|
1283 }
|
|
1284
|
|
1285 const auto kvsp = lex_keyval_sep::invoke(loc);
|
|
1286 if(!kvsp)
|
|
1287 {
|
|
1288 std::string msg;
|
|
1289 // if the line contains '=' after the invalid sequence, possibly the
|
|
1290 // error is in the key (like, invalid character in bare key).
|
|
1291 const auto line_end = std::find(loc.iter(), loc.end(), '\n');
|
|
1292 if(std::find(loc.iter(), line_end, '=') != line_end)
|
|
1293 {
|
|
1294 msg = format_underline("toml::parse_key_value_pair: "
|
|
1295 "invalid format for key",
|
|
1296 {{source_location(loc), "invalid character in key"}},
|
|
1297 {"Did you forget '.' to separate dotted-key?",
|
|
1298 "Allowed characters for bare key are [0-9a-zA-Z_-]."});
|
|
1299 }
|
|
1300 else // if not, the error is lack of key-value separator.
|
|
1301 {
|
|
1302 msg = format_underline("toml::parse_key_value_pair: "
|
|
1303 "missing key-value separator `=`",
|
|
1304 {{source_location(loc), "should be `=`"}});
|
|
1305 }
|
|
1306 loc.reset(first);
|
|
1307 return err(std::move(msg));
|
|
1308 }
|
|
1309
|
|
1310 const auto after_kvsp = loc.iter(); // err msg
|
|
1311 auto val = parse_value<value_type>(loc, n_rec);
|
|
1312 if(!val)
|
|
1313 {
|
|
1314 std::string msg;
|
|
1315 loc.reset(after_kvsp);
|
|
1316 // check there is something not a comment/whitespace after `=`
|
|
1317 if(sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>::invoke(loc))
|
|
1318 {
|
|
1319 loc.reset(after_kvsp);
|
|
1320 msg = format_underline("toml::parse_key_value_pair: "
|
|
1321 "missing value after key-value separator '='",
|
|
1322 {{source_location(loc), "expected value, but got nothing"}});
|
|
1323 }
|
|
1324 else // there is something not a comment/whitespace, so invalid format.
|
|
1325 {
|
|
1326 msg = std::move(val.unwrap_err());
|
|
1327 }
|
|
1328 loc.reset(first);
|
|
1329 return err(msg);
|
|
1330 }
|
|
1331 return ok(std::make_pair(std::move(key_reg.unwrap()),
|
|
1332 std::move(val.unwrap())));
|
|
1333 }
|
|
1334
|
|
1335 // for error messages.
|
|
1336 template<typename InputIterator>
|
|
1337 std::string format_dotted_keys(InputIterator first, const InputIterator last)
|
|
1338 {
|
|
1339 static_assert(std::is_same<key,
|
|
1340 typename std::iterator_traits<InputIterator>::value_type>::value,"");
|
|
1341
|
|
1342 std::string retval(*first++);
|
|
1343 for(; first != last; ++first)
|
|
1344 {
|
|
1345 retval += '.';
|
|
1346 retval += *first;
|
|
1347 }
|
|
1348 return retval;
|
|
1349 }
|
|
1350
|
|
1351 // forward decl for is_valid_forward_table_definition
|
|
1352 result<std::pair<std::vector<key>, region>, std::string>
|
|
1353 parse_table_key(location& loc);
|
|
1354 result<std::pair<std::vector<key>, region>, std::string>
|
|
1355 parse_array_table_key(location& loc);
|
|
1356 template<typename Value>
|
|
1357 result<std::pair<typename Value::table_type, region>, std::string>
|
|
1358 parse_inline_table(location& loc, const std::size_t n_rec);
|
|
1359
|
|
1360 // The following toml file is allowed.
|
|
1361 // ```toml
|
|
1362 // [a.b.c] # here, table `a` has element `b`.
|
|
1363 // foo = "bar"
|
|
1364 // [a] # merge a = {baz = "qux"} to a = {b = {...}}
|
|
1365 // baz = "qux"
|
|
1366 // ```
|
|
1367 // But the following is not allowed.
|
|
1368 // ```toml
|
|
1369 // [a]
|
|
1370 // b.c.foo = "bar"
|
|
1371 // [a] # error! the same table [a] defined!
|
|
1372 // baz = "qux"
|
|
1373 // ```
|
|
1374 // The following is neither allowed.
|
|
1375 // ```toml
|
|
1376 // a = { b.c.foo = "bar"}
|
|
1377 // [a] # error! the same table [a] defined!
|
|
1378 // baz = "qux"
|
|
1379 // ```
|
|
1380 // Here, it parses region of `tab->at(k)` as a table key and check the depth
|
|
1381 // of the key. If the key region points deeper node, it would be allowed.
|
|
1382 // Otherwise, the key points the same node. It would be rejected.
|
|
1383 template<typename Value, typename Iterator>
|
|
1384 bool is_valid_forward_table_definition(const Value& fwd, const Value& inserting,
|
|
1385 Iterator key_first, Iterator key_curr, Iterator key_last)
|
|
1386 {
|
|
1387 // ------------------------------------------------------------------------
|
|
1388 // check type of the value to be inserted/merged
|
|
1389
|
|
1390 std::string inserting_reg = "";
|
|
1391 if(const auto ptr = detail::get_region(inserting))
|
|
1392 {
|
|
1393 inserting_reg = ptr->str();
|
|
1394 }
|
|
1395 location inserting_def("internal", std::move(inserting_reg));
|
|
1396 if(const auto inlinetable = parse_inline_table<Value>(inserting_def, 0))
|
|
1397 {
|
|
1398 // check if we are overwriting existing table.
|
|
1399 // ```toml
|
|
1400 // # NG
|
|
1401 // a.b = 42
|
|
1402 // a = {d = 3.14}
|
|
1403 // ```
|
|
1404 // Inserting an inline table to a existing super-table is not allowed in
|
|
1405 // any case. If we found it, we can reject it without further checking.
|
|
1406 return false;
|
|
1407 }
|
|
1408
|
|
1409 // Valid and invalid cases when inserting to the [a.b] table:
|
|
1410 //
|
|
1411 // ## Invalid
|
|
1412 //
|
|
1413 // ```toml
|
|
1414 // # invalid
|
|
1415 // [a]
|
|
1416 // b.c.d = "foo"
|
|
1417 // [a.b] # a.b is already defined and closed
|
|
1418 // d = "bar"
|
|
1419 // ```
|
|
1420 // ```toml
|
|
1421 // # invalid
|
|
1422 // a = {b.c.d = "foo"}
|
|
1423 // [a.b] # a is already defined and inline table is closed
|
|
1424 // d = "bar"
|
|
1425 // ```
|
|
1426 // ```toml
|
|
1427 // # invalid
|
|
1428 // a.b.c.d = "foo"
|
|
1429 // [a.b] # a.b is already defined and dotted-key table is closed
|
|
1430 // d = "bar"
|
|
1431 // ```
|
|
1432 //
|
|
1433 // ## Valid
|
|
1434 //
|
|
1435 // ```toml
|
|
1436 // # OK. a.b is defined, but is *overwritable*
|
|
1437 // [a.b.c]
|
|
1438 // d = "foo"
|
|
1439 // [a.b]
|
|
1440 // d = "bar"
|
|
1441 // ```
|
|
1442 // ```toml
|
|
1443 // # OK. a.b is defined, but is *overwritable*
|
|
1444 // [a]
|
|
1445 // b.c.d = "foo"
|
|
1446 // b.e = "bar"
|
|
1447 // ```
|
|
1448
|
|
1449 // ------------------------------------------------------------------------
|
|
1450 // check table defined before
|
|
1451
|
|
1452 std::string internal = "";
|
|
1453 if(const auto ptr = detail::get_region(fwd))
|
|
1454 {
|
|
1455 internal = ptr->str();
|
|
1456 }
|
|
1457 location def("internal", std::move(internal));
|
|
1458 if(const auto tabkeys = parse_table_key(def)) // [table.key]
|
|
1459 {
|
|
1460 // table keys always contains all the nodes from the root.
|
|
1461 const auto& tks = tabkeys.unwrap().first;
|
|
1462 if(std::size_t(std::distance(key_first, key_last)) == tks.size() &&
|
|
1463 std::equal(tks.begin(), tks.end(), key_first))
|
|
1464 {
|
|
1465 // the keys are equivalent. it is not allowed.
|
|
1466 return false;
|
|
1467 }
|
|
1468 // the keys are not equivalent. it is allowed.
|
|
1469 return true;
|
|
1470 }
|
|
1471 // nested array-of-table definition implicitly defines tables.
|
|
1472 // those tables can be reopened.
|
|
1473 if(const auto atabkeys = parse_array_table_key(def))
|
|
1474 {
|
|
1475 // table keys always contains all the nodes from the root.
|
|
1476 const auto& tks = atabkeys.unwrap().first;
|
|
1477 if(std::size_t(std::distance(key_first, key_last)) == tks.size() &&
|
|
1478 std::equal(tks.begin(), tks.end(), key_first))
|
|
1479 {
|
|
1480 // the keys are equivalent. it is not allowed.
|
|
1481 return false;
|
|
1482 }
|
|
1483 // the keys are not equivalent. it is allowed.
|
|
1484 return true;
|
|
1485 }
|
|
1486 if(const auto dotkeys = parse_key(def)) // a.b.c = "foo"
|
|
1487 {
|
|
1488 // consider the following case.
|
|
1489 // [a]
|
|
1490 // b.c = {d = 42}
|
|
1491 // [a.b.c]
|
|
1492 // e = 2.71
|
|
1493 // this defines the table [a.b.c] twice. no?
|
|
1494 if(const auto reopening_dotkey_by_table = parse_table_key(inserting_def))
|
|
1495 {
|
|
1496 // re-opening a dotkey-defined table by a table is invalid.
|
|
1497 // only dotkey can append a key-val. Like:
|
|
1498 // ```toml
|
|
1499 // a.b.c = "foo"
|
|
1500 // a.b.d = "bar" # OK. reopen `a.b` by dotkey
|
|
1501 // [a.b]
|
|
1502 // e = "bar" # Invalid. re-opening `a.b` by [a.b] is not allowed.
|
|
1503 // ```
|
|
1504 return false;
|
|
1505 }
|
|
1506
|
|
1507 // a dotted key starts from the node representing a table in which the
|
|
1508 // dotted key belongs to.
|
|
1509 const auto& dks = dotkeys.unwrap().first;
|
|
1510 if(std::size_t(std::distance(key_curr, key_last)) == dks.size() &&
|
|
1511 std::equal(dks.begin(), dks.end(), key_curr))
|
|
1512 {
|
|
1513 // the keys are equivalent. it is not allowed.
|
|
1514 return false;
|
|
1515 }
|
|
1516 // the keys are not equivalent. it is allowed.
|
|
1517 return true;
|
|
1518 }
|
|
1519 return false;
|
|
1520 }
|
|
1521
|
|
1522 template<typename Value, typename InputIterator>
|
|
1523 result<bool, std::string>
|
|
1524 insert_nested_key(typename Value::table_type& root, const Value& v,
|
|
1525 InputIterator iter, const InputIterator last,
|
|
1526 region key_reg,
|
|
1527 const bool is_array_of_table = false)
|
|
1528 {
|
|
1529 static_assert(std::is_same<key,
|
|
1530 typename std::iterator_traits<InputIterator>::value_type>::value,"");
|
|
1531
|
|
1532 using value_type = Value;
|
|
1533 using table_type = typename value_type::table_type;
|
|
1534 using array_type = typename value_type::array_type;
|
|
1535
|
|
1536 const auto first = iter;
|
|
1537 assert(iter != last);
|
|
1538
|
|
1539 table_type* tab = std::addressof(root);
|
|
1540 for(; iter != last; ++iter) // search recursively
|
|
1541 {
|
|
1542 const key& k = *iter;
|
|
1543 if(std::next(iter) == last) // k is the last key
|
|
1544 {
|
|
1545 // XXX if the value is array-of-tables, there can be several
|
|
1546 // tables that are in the same array. in that case, we need to
|
|
1547 // find the last element and insert it to there.
|
|
1548 if(is_array_of_table)
|
|
1549 {
|
|
1550 if(tab->count(k) == 1) // there is already an array of table
|
|
1551 {
|
|
1552 if(tab->at(k).is_table())
|
|
1553 {
|
|
1554 // show special err msg for conflicting table
|
|
1555 throw syntax_error(format_underline(concat_to_string(
|
|
1556 "toml::insert_value: array of table (\"",
|
|
1557 format_dotted_keys(first, last),
|
|
1558 "\") cannot be defined"), {
|
|
1559 {tab->at(k).location(), "table already defined"},
|
|
1560 {v.location(), "this conflicts with the previous table"}
|
|
1561 }), v.location());
|
|
1562 }
|
|
1563 else if(!(tab->at(k).is_array()))
|
|
1564 {
|
|
1565 throw syntax_error(format_underline(concat_to_string(
|
|
1566 "toml::insert_value: array of table (\"",
|
|
1567 format_dotted_keys(first, last), "\") collides with"
|
|
1568 " existing value"), {
|
|
1569 {tab->at(k).location(),
|
|
1570 concat_to_string("this ", tab->at(k).type(),
|
|
1571 " value already exists")},
|
|
1572 {v.location(),
|
|
1573 "while inserting this array-of-tables"}
|
|
1574 }), v.location());
|
|
1575 }
|
|
1576 // the above if-else-if checks tab->at(k) is an array
|
|
1577 auto& a = tab->at(k).as_array();
|
|
1578 // If table element is defined as [[array_of_tables]], it
|
|
1579 // cannot be an empty array. If an array of tables is
|
|
1580 // defined as `aot = []`, it cannot be appended.
|
|
1581 if(a.empty() || !(a.front().is_table()))
|
|
1582 {
|
|
1583 throw syntax_error(format_underline(concat_to_string(
|
|
1584 "toml::insert_value: array of table (\"",
|
|
1585 format_dotted_keys(first, last), "\") collides with"
|
|
1586 " existing value"), {
|
|
1587 {tab->at(k).location(),
|
|
1588 concat_to_string("this ", tab->at(k).type(),
|
|
1589 " value already exists")},
|
|
1590 {v.location(),
|
|
1591 "while inserting this array-of-tables"}
|
|
1592 }), v.location());
|
|
1593 }
|
|
1594 // avoid conflicting array of table like the following.
|
|
1595 // ```toml
|
|
1596 // a = [{b = 42}] # define a as an array of *inline* tables
|
|
1597 // [[a]] # a is an array of *multi-line* tables
|
|
1598 // b = 54
|
|
1599 // ```
|
|
1600 // Here, from the type information, these cannot be detected
|
|
1601 // because inline table is also a table.
|
|
1602 // But toml v0.5.0 explicitly says it is invalid. The above
|
|
1603 // array-of-tables has a static size and appending to the
|
|
1604 // array is invalid.
|
|
1605 // In this library, multi-line table value has a region
|
|
1606 // that points to the key of the table (e.g. [[a]]). By
|
|
1607 // comparing the first two letters in key, we can detect
|
|
1608 // the array-of-table is inline or multiline.
|
|
1609 if(const auto ptr = detail::get_region(a.front()))
|
|
1610 {
|
|
1611 if(ptr->str().substr(0,2) != "[[")
|
|
1612 {
|
|
1613 throw syntax_error(format_underline(concat_to_string(
|
|
1614 "toml::insert_value: array of table (\"",
|
|
1615 format_dotted_keys(first, last), "\") collides "
|
|
1616 "with existing array-of-tables"), {
|
|
1617 {tab->at(k).location(),
|
|
1618 concat_to_string("this ", tab->at(k).type(),
|
|
1619 " value has static size")},
|
|
1620 {v.location(),
|
|
1621 "appending it to the statically sized array"}
|
|
1622 }), v.location());
|
|
1623 }
|
|
1624 }
|
|
1625 a.push_back(v);
|
|
1626 return ok(true);
|
|
1627 }
|
|
1628 else // if not, we need to create the array of table
|
|
1629 {
|
|
1630 // XXX: Consider the following array of tables.
|
|
1631 // ```toml
|
|
1632 // # This is a comment.
|
|
1633 // [[aot]]
|
|
1634 // foo = "bar"
|
|
1635 // ```
|
|
1636 // Here, the comment is for `aot`. But here, actually two
|
|
1637 // values are defined. An array that contains tables, named
|
|
1638 // `aot`, and the 0th element of the `aot`, `{foo = "bar"}`.
|
|
1639 // Those two are different from each other. But both of them
|
|
1640 // points to the same portion of the TOML file, `[[aot]]`,
|
|
1641 // so `key_reg.comments()` returns `# This is a comment`.
|
|
1642 // If it is assigned as a comment of `aot` defined here, the
|
|
1643 // comment will be duplicated. Both the `aot` itself and
|
|
1644 // the 0-th element will have the same comment. This causes
|
|
1645 // "duplication of the same comments" bug when the data is
|
|
1646 // serialized.
|
|
1647 // Next, consider the following.
|
|
1648 // ```toml
|
|
1649 // # comment 1
|
|
1650 // aot = [
|
|
1651 // # comment 2
|
|
1652 // {foo = "bar"},
|
|
1653 // ]
|
|
1654 // ```
|
|
1655 // In this case, we can distinguish those two comments. So
|
|
1656 // here we need to add "comment 1" to the `aot` and
|
|
1657 // "comment 2" to the 0th element of that.
|
|
1658 // To distinguish those two, we check the key region.
|
|
1659 std::vector<std::string> comments{/* empty by default */};
|
|
1660 if(key_reg.str().substr(0, 2) != "[[")
|
|
1661 {
|
|
1662 comments = key_reg.comments();
|
|
1663 }
|
|
1664 value_type aot(array_type(1, v), key_reg, std::move(comments));
|
|
1665 tab->insert(std::make_pair(k, aot));
|
|
1666 return ok(true);
|
|
1667 }
|
|
1668 } // end if(array of table)
|
|
1669
|
|
1670 if(tab->count(k) == 1)
|
|
1671 {
|
|
1672 if(tab->at(k).is_table() && v.is_table())
|
|
1673 {
|
|
1674 if(!is_valid_forward_table_definition(
|
|
1675 tab->at(k), v, first, iter, last))
|
|
1676 {
|
|
1677 throw syntax_error(format_underline(concat_to_string(
|
|
1678 "toml::insert_value: table (\"",
|
|
1679 format_dotted_keys(first, last),
|
|
1680 "\") already exists."), {
|
|
1681 {tab->at(k).location(), "table already exists here"},
|
|
1682 {v.location(), "table defined twice"}
|
|
1683 }), v.location());
|
|
1684 }
|
|
1685 // to allow the following toml file.
|
|
1686 // [a.b.c]
|
|
1687 // d = 42
|
|
1688 // [a]
|
|
1689 // e = 2.71
|
|
1690 auto& t = tab->at(k).as_table();
|
|
1691 for(const auto& kv : v.as_table())
|
|
1692 {
|
|
1693 if(tab->at(k).contains(kv.first))
|
|
1694 {
|
|
1695 throw syntax_error(format_underline(concat_to_string(
|
|
1696 "toml::insert_value: value (\"",
|
|
1697 format_dotted_keys(first, last),
|
|
1698 "\") already exists."), {
|
|
1699 {t.at(kv.first).location(), "already exists here"},
|
|
1700 {v.location(), "this defined twice"}
|
|
1701 }), v.location());
|
|
1702 }
|
|
1703 t[kv.first] = kv.second;
|
|
1704 }
|
|
1705 detail::change_region(tab->at(k), key_reg);
|
|
1706 return ok(true);
|
|
1707 }
|
|
1708 else if(v.is_table() &&
|
|
1709 tab->at(k).is_array() &&
|
|
1710 tab->at(k).as_array().size() > 0 &&
|
|
1711 tab->at(k).as_array().front().is_table())
|
|
1712 {
|
|
1713 throw syntax_error(format_underline(concat_to_string(
|
|
1714 "toml::insert_value: array of tables (\"",
|
|
1715 format_dotted_keys(first, last), "\") already exists."), {
|
|
1716 {tab->at(k).location(), "array of tables defined here"},
|
|
1717 {v.location(), "table conflicts with the previous array of table"}
|
|
1718 }), v.location());
|
|
1719 }
|
|
1720 else
|
|
1721 {
|
|
1722 throw syntax_error(format_underline(concat_to_string(
|
|
1723 "toml::insert_value: value (\"",
|
|
1724 format_dotted_keys(first, last), "\") already exists."), {
|
|
1725 {tab->at(k).location(), "value already exists here"},
|
|
1726 {v.location(), "value defined twice"}
|
|
1727 }), v.location());
|
|
1728 }
|
|
1729 }
|
|
1730 tab->insert(std::make_pair(k, v));
|
|
1731 return ok(true);
|
|
1732 }
|
|
1733 else // k is not the last one, we should insert recursively
|
|
1734 {
|
|
1735 // if there is no corresponding value, insert it first.
|
|
1736 // related: you don't need to write
|
|
1737 // # [x]
|
|
1738 // # [x.y]
|
|
1739 // to write
|
|
1740 // [x.y.z]
|
|
1741 if(tab->count(k) == 0)
|
|
1742 {
|
|
1743 // a table that is defined implicitly doesn't have any comments.
|
|
1744 (*tab)[k] = value_type(table_type{}, key_reg, {/*no comment*/});
|
|
1745 }
|
|
1746
|
|
1747 // type checking...
|
|
1748 if(tab->at(k).is_table())
|
|
1749 {
|
|
1750 // According to toml-lang/toml:36d3091b3 "Clarify that inline
|
|
1751 // tables are immutable", check if it adds key-value pair to an
|
|
1752 // inline table.
|
|
1753 if(const auto* ptr = get_region(tab->at(k)))
|
|
1754 {
|
|
1755 // here, if the value is a (multi-line) table, the region
|
|
1756 // should be something like `[table-name]`.
|
|
1757 if(ptr->front() == '{')
|
|
1758 {
|
|
1759 throw syntax_error(format_underline(concat_to_string(
|
|
1760 "toml::insert_value: inserting to an inline table (",
|
|
1761 format_dotted_keys(first, std::next(iter)),
|
|
1762 ") but inline tables are immutable"), {
|
|
1763 {tab->at(k).location(), "inline tables are immutable"},
|
|
1764 {v.location(), "inserting this"}
|
|
1765 }), v.location());
|
|
1766 }
|
|
1767 }
|
|
1768 tab = std::addressof((*tab)[k].as_table());
|
|
1769 }
|
|
1770 else if(tab->at(k).is_array()) // inserting to array-of-tables?
|
|
1771 {
|
|
1772 auto& a = (*tab)[k].as_array();
|
|
1773 if(!a.back().is_table())
|
|
1774 {
|
|
1775 throw syntax_error(format_underline(concat_to_string(
|
|
1776 "toml::insert_value: target (",
|
|
1777 format_dotted_keys(first, std::next(iter)),
|
|
1778 ") is neither table nor an array of tables"), {
|
|
1779 {a.back().location(), concat_to_string(
|
|
1780 "actual type is ", a.back().type())},
|
|
1781 {v.location(), "inserting this"}
|
|
1782 }), v.location());
|
|
1783 }
|
|
1784 if(a.empty())
|
|
1785 {
|
|
1786 throw syntax_error(format_underline(concat_to_string(
|
|
1787 "toml::insert_value: table (\"",
|
|
1788 format_dotted_keys(first, last), "\") conflicts with"
|
|
1789 " existing value"), {
|
|
1790 {tab->at(k).location(), std::string("this array is not insertable")},
|
|
1791 {v.location(), std::string("appending it to the statically sized array")}
|
|
1792 }), v.location());
|
|
1793 }
|
|
1794 if(const auto ptr = detail::get_region(a.at(0)))
|
|
1795 {
|
|
1796 if(ptr->str().substr(0,2) != "[[")
|
|
1797 {
|
|
1798 throw syntax_error(format_underline(concat_to_string(
|
|
1799 "toml::insert_value: a table (\"",
|
|
1800 format_dotted_keys(first, last), "\") cannot be "
|
|
1801 "inserted to an existing inline array-of-tables"), {
|
|
1802 {tab->at(k).location(), std::string("this array of table has a static size")},
|
|
1803 {v.location(), std::string("appending it to the statically sized array")}
|
|
1804 }), v.location());
|
|
1805 }
|
|
1806 }
|
|
1807 tab = std::addressof(a.back().as_table());
|
|
1808 }
|
|
1809 else
|
|
1810 {
|
|
1811 throw syntax_error(format_underline(concat_to_string(
|
|
1812 "toml::insert_value: target (",
|
|
1813 format_dotted_keys(first, std::next(iter)),
|
|
1814 ") is neither table nor an array of tables"), {
|
|
1815 {tab->at(k).location(), concat_to_string(
|
|
1816 "actual type is ", tab->at(k).type())},
|
|
1817 {v.location(), "inserting this"}
|
|
1818 }), v.location());
|
|
1819 }
|
|
1820 }
|
|
1821 }
|
|
1822 return err(std::string("toml::detail::insert_nested_key: never reach here"));
|
|
1823 }
|
|
1824
|
|
1825 template<typename Value>
|
|
1826 result<std::pair<typename Value::table_type, region>, std::string>
|
|
1827 parse_inline_table(location& loc, const std::size_t n_rec)
|
|
1828 {
|
|
1829 using value_type = Value;
|
|
1830 using table_type = typename value_type::table_type;
|
|
1831
|
|
1832 if(n_rec > TOML11_VALUE_RECURSION_LIMIT)
|
|
1833 {
|
|
1834 throw syntax_error(std::string("toml::parse_inline_table: recursion limit ("
|
|
1835 TOML11_STRINGIZE(TOML11_VALUE_RECURSION_LIMIT) ") exceeded"),
|
|
1836 source_location(loc));
|
|
1837 }
|
|
1838
|
|
1839 const auto first = loc.iter();
|
|
1840 table_type retval;
|
|
1841 if(!(loc.iter() != loc.end() && *loc.iter() == '{'))
|
|
1842 {
|
|
1843 return err(format_underline("toml::parse_inline_table: ",
|
|
1844 {{source_location(loc), "the next token is not an inline table"}}));
|
|
1845 }
|
|
1846 loc.advance();
|
|
1847
|
|
1848 // check if the inline table is an empty table = { }
|
|
1849 maybe<lex_ws>::invoke(loc);
|
|
1850 if(loc.iter() != loc.end() && *loc.iter() == '}')
|
|
1851 {
|
|
1852 loc.advance(); // skip `}`
|
|
1853 return ok(std::make_pair(retval, region(loc, first, loc.iter())));
|
|
1854 }
|
|
1855
|
|
1856 // it starts from "{". it should be formatted as inline-table
|
|
1857 while(loc.iter() != loc.end())
|
|
1858 {
|
|
1859 const auto kv_r = parse_key_value_pair<value_type>(loc, n_rec+1);
|
|
1860 if(!kv_r)
|
|
1861 {
|
|
1862 return err(kv_r.unwrap_err());
|
|
1863 }
|
|
1864
|
|
1865 const auto& kvpair = kv_r.unwrap();
|
|
1866 const std::vector<key>& keys = kvpair.first.first;
|
|
1867 const auto& key_reg = kvpair.first.second;
|
|
1868 const value_type& val = kvpair.second;
|
|
1869
|
|
1870 const auto inserted =
|
|
1871 insert_nested_key(retval, val, keys.begin(), keys.end(), key_reg);
|
|
1872 if(!inserted)
|
|
1873 {
|
|
1874 throw internal_error("toml::parse_inline_table: "
|
|
1875 "failed to insert value into table: " + inserted.unwrap_err(),
|
|
1876 source_location(loc));
|
|
1877 }
|
|
1878
|
|
1879 using lex_table_separator = sequence<maybe<lex_ws>, character<','>>;
|
|
1880 const auto sp = lex_table_separator::invoke(loc);
|
|
1881
|
|
1882 if(!sp)
|
|
1883 {
|
|
1884 maybe<lex_ws>::invoke(loc);
|
|
1885
|
|
1886 if(loc.iter() == loc.end())
|
|
1887 {
|
|
1888 throw syntax_error(format_underline(
|
|
1889 "toml::parse_inline_table: missing table separator `}` ",
|
|
1890 {{source_location(loc), "should be `}`"}}),
|
|
1891 source_location(loc));
|
|
1892 }
|
|
1893 else if(*loc.iter() == '}')
|
|
1894 {
|
|
1895 loc.advance(); // skip `}`
|
|
1896 return ok(std::make_pair(
|
|
1897 retval, region(loc, first, loc.iter())));
|
|
1898 }
|
|
1899 else if(*loc.iter() == '#' || *loc.iter() == '\r' || *loc.iter() == '\n')
|
|
1900 {
|
|
1901 throw syntax_error(format_underline(
|
|
1902 "toml::parse_inline_table: missing curly brace `}`",
|
|
1903 {{source_location(loc), "should be `}`"}}),
|
|
1904 source_location(loc));
|
|
1905 }
|
|
1906 else
|
|
1907 {
|
|
1908 throw syntax_error(format_underline(
|
|
1909 "toml::parse_inline_table: missing table separator `,` ",
|
|
1910 {{source_location(loc), "should be `,`"}}),
|
|
1911 source_location(loc));
|
|
1912 }
|
|
1913 }
|
|
1914 else // `,` is found
|
|
1915 {
|
|
1916 maybe<lex_ws>::invoke(loc);
|
|
1917 if(loc.iter() != loc.end() && *loc.iter() == '}')
|
|
1918 {
|
|
1919 throw syntax_error(format_underline(
|
|
1920 "toml::parse_inline_table: trailing comma is not allowed in"
|
|
1921 " an inline table",
|
|
1922 {{source_location(loc), "should be `}`"}}),
|
|
1923 source_location(loc));
|
|
1924 }
|
|
1925 }
|
|
1926 }
|
|
1927 loc.reset(first);
|
|
1928 throw syntax_error(format_underline("toml::parse_inline_table: "
|
|
1929 "inline table did not closed by `}`",
|
|
1930 {{source_location(loc), "should be closed"}}),
|
|
1931 source_location(loc));
|
|
1932 }
|
|
1933
|
|
1934 inline result<value_t, std::string> guess_number_type(const location& l)
|
|
1935 {
|
|
1936 // This function tries to find some (common) mistakes by checking characters
|
|
1937 // that follows the last character of a value. But it is often difficult
|
|
1938 // because some non-newline characters can appear after a value. E.g.
|
|
1939 // spaces, tabs, commas (in an array or inline table), closing brackets
|
|
1940 // (of an array or inline table), comment-sign (#). Since this function
|
|
1941 // does not parse further, those characters are always allowed to be there.
|
|
1942 location loc = l;
|
|
1943
|
|
1944 if(lex_offset_date_time::invoke(loc)) {return ok(value_t::offset_datetime);}
|
|
1945 loc.reset(l.iter());
|
|
1946
|
|
1947 if(lex_local_date_time::invoke(loc))
|
|
1948 {
|
|
1949 // bad offset may appear after this.
|
|
1950 if(loc.iter() != loc.end() && (*loc.iter() == '+' || *loc.iter() == '-'
|
|
1951 || *loc.iter() == 'Z' || *loc.iter() == 'z'))
|
|
1952 {
|
|
1953 return err(format_underline("bad offset: should be [+-]HH:MM or Z",
|
|
1954 {{source_location(loc), "[+-]HH:MM or Z"}},
|
|
1955 {"pass: +09:00, -05:30", "fail: +9:00, -5:30"}));
|
|
1956 }
|
|
1957 return ok(value_t::local_datetime);
|
|
1958 }
|
|
1959 loc.reset(l.iter());
|
|
1960
|
|
1961 if(lex_local_date::invoke(loc))
|
|
1962 {
|
|
1963 // bad time may appear after this.
|
|
1964 // A space is allowed as a delimiter between local time. But there are
|
|
1965 // both cases in which a space becomes valid or invalid.
|
|
1966 // - invalid: 2019-06-16 7:00:00
|
|
1967 // - valid : 2019-06-16 07:00:00
|
|
1968 if(loc.iter() != loc.end())
|
|
1969 {
|
|
1970 const auto c = *loc.iter();
|
|
1971 if(c == 'T' || c == 't')
|
|
1972 {
|
|
1973 return err(format_underline("bad time: should be HH:MM:SS.subsec",
|
|
1974 {{source_location(loc), "HH:MM:SS.subsec"}},
|
|
1975 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
|
1976 "fail: 1979-05-27T7:32:00, 1979-05-27 17:32"}));
|
|
1977 }
|
|
1978 if('0' <= c && c <= '9')
|
|
1979 {
|
|
1980 return err(format_underline("bad time: missing T",
|
|
1981 {{source_location(loc), "T or space required here"}},
|
|
1982 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
|
1983 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
|
|
1984 }
|
|
1985 if(c == ' ' && std::next(loc.iter()) != loc.end() &&
|
|
1986 ('0' <= *std::next(loc.iter()) && *std::next(loc.iter())<= '9'))
|
|
1987 {
|
|
1988 loc.advance();
|
|
1989 return err(format_underline("bad time: should be HH:MM:SS.subsec",
|
|
1990 {{source_location(loc), "HH:MM:SS.subsec"}},
|
|
1991 {"pass: 1979-05-27T07:32:00, 1979-05-27 07:32:00.999999",
|
|
1992 "fail: 1979-05-27T7:32:00, 1979-05-27 7:32"}));
|
|
1993 }
|
|
1994 }
|
|
1995 return ok(value_t::local_date);
|
|
1996 }
|
|
1997 loc.reset(l.iter());
|
|
1998
|
|
1999 if(lex_local_time::invoke(loc)) {return ok(value_t::local_time);}
|
|
2000 loc.reset(l.iter());
|
|
2001
|
|
2002 if(lex_float::invoke(loc))
|
|
2003 {
|
|
2004 if(loc.iter() != loc.end() && *loc.iter() == '_')
|
|
2005 {
|
|
2006 return err(format_underline("bad float: `_` should be surrounded by digits",
|
|
2007 {{source_location(loc), "here"}},
|
|
2008 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
|
2009 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
|
2010 }
|
|
2011 return ok(value_t::floating);
|
|
2012 }
|
|
2013 loc.reset(l.iter());
|
|
2014
|
|
2015 if(lex_integer::invoke(loc))
|
|
2016 {
|
|
2017 if(loc.iter() != loc.end())
|
|
2018 {
|
|
2019 const auto c = *loc.iter();
|
|
2020 if(c == '_')
|
|
2021 {
|
|
2022 return err(format_underline("bad integer: `_` should be surrounded by digits",
|
|
2023 {{source_location(loc), "here"}},
|
|
2024 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
|
2025 "fail: 1__000, 0123"}));
|
|
2026 }
|
|
2027 if('0' <= c && c <= '9')
|
|
2028 {
|
|
2029 // leading zero. point '0'
|
|
2030 loc.retrace();
|
|
2031 return err(format_underline("bad integer: leading zero",
|
|
2032 {{source_location(loc), "here"}},
|
|
2033 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
|
2034 "fail: 1__000, 0123"}));
|
|
2035 }
|
|
2036 if(c == ':' || c == '-')
|
|
2037 {
|
|
2038 return err(format_underline("bad datetime: invalid format",
|
|
2039 {{source_location(loc), "here"}},
|
|
2040 {"pass: 1979-05-27T07:32:00-07:00, 1979-05-27 07:32:00.999999Z",
|
|
2041 "fail: 1979-05-27T7:32:00-7:00, 1979-05-27 7:32-00:30"}));
|
|
2042 }
|
|
2043 if(c == '.' || c == 'e' || c == 'E')
|
|
2044 {
|
|
2045 return err(format_underline("bad float: invalid format",
|
|
2046 {{source_location(loc), "here"}},
|
|
2047 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
|
2048 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
|
2049 }
|
|
2050 }
|
|
2051 return ok(value_t::integer);
|
|
2052 }
|
|
2053 if(loc.iter() != loc.end() && *loc.iter() == '.')
|
|
2054 {
|
|
2055 return err(format_underline("bad float: invalid format",
|
|
2056 {{source_location(loc), "integer part required before this"}},
|
|
2057 {"pass: +1.0, -2e-2, 3.141_592_653_589, inf, nan",
|
|
2058 "fail: .0, 1., _1.0, 1.0_, 1_.0, 1.0__0"}));
|
|
2059 }
|
|
2060 if(loc.iter() != loc.end() && *loc.iter() == '_')
|
|
2061 {
|
|
2062 return err(format_underline("bad number: `_` should be surrounded by digits",
|
|
2063 {{source_location(loc), "`_` is not surrounded by digits"}},
|
|
2064 {"pass: -42, 1_000, 1_2_3_4_5, 0xC0FFEE, 0b0010, 0o755",
|
|
2065 "fail: 1__000, 0123"}));
|
|
2066 }
|
|
2067 return err(format_underline("bad format: unknown value appeared",
|
|
2068 {{source_location(loc), "here"}}));
|
|
2069 }
|
|
2070
|
|
2071 inline result<value_t, std::string> guess_value_type(const location& loc)
|
|
2072 {
|
|
2073 switch(*loc.iter())
|
|
2074 {
|
|
2075 case '"' : {return ok(value_t::string); }
|
|
2076 case '\'': {return ok(value_t::string); }
|
|
2077 case 't' : {return ok(value_t::boolean); }
|
|
2078 case 'f' : {return ok(value_t::boolean); }
|
|
2079 case '[' : {return ok(value_t::array); }
|
|
2080 case '{' : {return ok(value_t::table); }
|
|
2081 case 'i' : {return ok(value_t::floating);} // inf.
|
|
2082 case 'n' : {return ok(value_t::floating);} // nan.
|
|
2083 default : {return guess_number_type(loc);}
|
|
2084 }
|
|
2085 }
|
|
2086
|
|
2087 template<typename Value, typename T>
|
|
2088 result<Value, std::string>
|
|
2089 parse_value_helper(result<std::pair<T, region>, std::string> rslt)
|
|
2090 {
|
|
2091 if(rslt.is_ok())
|
|
2092 {
|
|
2093 auto comments = rslt.as_ok().second.comments();
|
|
2094 return ok(Value(std::move(rslt.as_ok()), std::move(comments)));
|
|
2095 }
|
|
2096 else
|
|
2097 {
|
|
2098 return err(std::move(rslt.as_err()));
|
|
2099 }
|
|
2100 }
|
|
2101
|
|
2102 template<typename Value>
|
|
2103 result<Value, std::string> parse_value(location& loc, const std::size_t n_rec)
|
|
2104 {
|
|
2105 const auto first = loc.iter();
|
|
2106 if(first == loc.end())
|
|
2107 {
|
|
2108 return err(format_underline("toml::parse_value: input is empty",
|
|
2109 {{source_location(loc), ""}}));
|
|
2110 }
|
|
2111
|
|
2112 const auto type = guess_value_type(loc);
|
|
2113 if(!type)
|
|
2114 {
|
|
2115 return err(type.unwrap_err());
|
|
2116 }
|
|
2117
|
|
2118 switch(type.unwrap())
|
|
2119 {
|
|
2120 case value_t::boolean : {return parse_value_helper<Value>(parse_boolean(loc) );}
|
|
2121 case value_t::integer : {return parse_value_helper<Value>(parse_integer(loc) );}
|
|
2122 case value_t::floating : {return parse_value_helper<Value>(parse_floating(loc) );}
|
|
2123 case value_t::string : {return parse_value_helper<Value>(parse_string(loc) );}
|
|
2124 case value_t::offset_datetime: {return parse_value_helper<Value>(parse_offset_datetime(loc) );}
|
|
2125 case value_t::local_datetime : {return parse_value_helper<Value>(parse_local_datetime(loc) );}
|
|
2126 case value_t::local_date : {return parse_value_helper<Value>(parse_local_date(loc) );}
|
|
2127 case value_t::local_time : {return parse_value_helper<Value>(parse_local_time(loc) );}
|
|
2128 case value_t::array : {return parse_value_helper<Value>(parse_array<Value>(loc, n_rec));}
|
|
2129 case value_t::table : {return parse_value_helper<Value>(parse_inline_table<Value>(loc, n_rec));}
|
|
2130 default:
|
|
2131 {
|
|
2132 const auto msg = format_underline("toml::parse_value: "
|
|
2133 "unknown token appeared", {{source_location(loc), "unknown"}});
|
|
2134 loc.reset(first);
|
|
2135 return err(msg);
|
|
2136 }
|
|
2137 }
|
|
2138 }
|
|
2139
|
|
2140 inline result<std::pair<std::vector<key>, region>, std::string>
|
|
2141 parse_table_key(location& loc)
|
|
2142 {
|
|
2143 if(auto token = lex_std_table::invoke(loc))
|
|
2144 {
|
|
2145 location inner_loc(loc.name(), token.unwrap().str());
|
|
2146
|
|
2147 const auto open = lex_std_table_open::invoke(inner_loc);
|
|
2148 if(!open || inner_loc.iter() == inner_loc.end())
|
|
2149 {
|
|
2150 throw internal_error(format_underline(
|
|
2151 "toml::parse_table_key: no `[`",
|
|
2152 {{source_location(inner_loc), "should be `[`"}}),
|
|
2153 source_location(inner_loc));
|
|
2154 }
|
|
2155 // to skip [ a . b . c ]
|
|
2156 // ^----------- this whitespace
|
|
2157 lex_ws::invoke(inner_loc);
|
|
2158 const auto keys = parse_key(inner_loc);
|
|
2159 if(!keys)
|
|
2160 {
|
|
2161 throw internal_error(format_underline(
|
|
2162 "toml::parse_table_key: invalid key",
|
|
2163 {{source_location(inner_loc), "not key"}}),
|
|
2164 source_location(inner_loc));
|
|
2165 }
|
|
2166 // to skip [ a . b . c ]
|
|
2167 // ^-- this whitespace
|
|
2168 lex_ws::invoke(inner_loc);
|
|
2169 const auto close = lex_std_table_close::invoke(inner_loc);
|
|
2170 if(!close)
|
|
2171 {
|
|
2172 throw internal_error(format_underline(
|
|
2173 "toml::parse_table_key: no `]`",
|
|
2174 {{source_location(inner_loc), "should be `]`"}}),
|
|
2175 source_location(inner_loc));
|
|
2176 }
|
|
2177
|
|
2178 // after [table.key], newline or EOF(empty table) required.
|
|
2179 if(loc.iter() != loc.end())
|
|
2180 {
|
|
2181 using lex_newline_after_table_key =
|
|
2182 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
|
|
2183 const auto nl = lex_newline_after_table_key::invoke(loc);
|
|
2184 if(!nl)
|
|
2185 {
|
|
2186 throw syntax_error(format_underline(
|
|
2187 "toml::parse_table_key: newline required after [table.key]",
|
|
2188 {{source_location(loc), "expected newline"}}),
|
|
2189 source_location(loc));
|
|
2190 }
|
|
2191 }
|
|
2192 return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
|
|
2193 }
|
|
2194 else
|
|
2195 {
|
|
2196 return err(format_underline("toml::parse_table_key: "
|
|
2197 "not a valid table key", {{source_location(loc), "here"}}));
|
|
2198 }
|
|
2199 }
|
|
2200
|
|
2201 inline result<std::pair<std::vector<key>, region>, std::string>
|
|
2202 parse_array_table_key(location& loc)
|
|
2203 {
|
|
2204 if(auto token = lex_array_table::invoke(loc))
|
|
2205 {
|
|
2206 location inner_loc(loc.name(), token.unwrap().str());
|
|
2207
|
|
2208 const auto open = lex_array_table_open::invoke(inner_loc);
|
|
2209 if(!open || inner_loc.iter() == inner_loc.end())
|
|
2210 {
|
|
2211 throw internal_error(format_underline(
|
|
2212 "toml::parse_array_table_key: no `[[`",
|
|
2213 {{source_location(inner_loc), "should be `[[`"}}),
|
|
2214 source_location(inner_loc));
|
|
2215 }
|
|
2216 lex_ws::invoke(inner_loc);
|
|
2217 const auto keys = parse_key(inner_loc);
|
|
2218 if(!keys)
|
|
2219 {
|
|
2220 throw internal_error(format_underline(
|
|
2221 "toml::parse_array_table_key: invalid key",
|
|
2222 {{source_location(inner_loc), "not a key"}}),
|
|
2223 source_location(inner_loc));
|
|
2224 }
|
|
2225 lex_ws::invoke(inner_loc);
|
|
2226 const auto close = lex_array_table_close::invoke(inner_loc);
|
|
2227 if(!close)
|
|
2228 {
|
|
2229 throw internal_error(format_underline(
|
|
2230 "toml::parse_array_table_key: no `]]`",
|
|
2231 {{source_location(inner_loc), "should be `]]`"}}),
|
|
2232 source_location(inner_loc));
|
|
2233 }
|
|
2234
|
|
2235 // after [[table.key]], newline or EOF(empty table) required.
|
|
2236 if(loc.iter() != loc.end())
|
|
2237 {
|
|
2238 using lex_newline_after_table_key =
|
|
2239 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>;
|
|
2240 const auto nl = lex_newline_after_table_key::invoke(loc);
|
|
2241 if(!nl)
|
|
2242 {
|
|
2243 throw syntax_error(format_underline("toml::"
|
|
2244 "parse_array_table_key: newline required after [[table.key]]",
|
|
2245 {{source_location(loc), "expected newline"}}),
|
|
2246 source_location(loc));
|
|
2247 }
|
|
2248 }
|
|
2249 return ok(std::make_pair(keys.unwrap().first, token.unwrap()));
|
|
2250 }
|
|
2251 else
|
|
2252 {
|
|
2253 return err(format_underline("toml::parse_array_table_key: "
|
|
2254 "not a valid table key", {{source_location(loc), "here"}}));
|
|
2255 }
|
|
2256 }
|
|
2257
|
|
2258 // parse table body (key-value pairs until the iter hits the next [tablekey])
|
|
2259 template<typename Value>
|
|
2260 result<typename Value::table_type, std::string>
|
|
2261 parse_ml_table(location& loc)
|
|
2262 {
|
|
2263 using value_type = Value;
|
|
2264 using table_type = typename value_type::table_type;
|
|
2265
|
|
2266 const auto first = loc.iter();
|
|
2267 if(first == loc.end())
|
|
2268 {
|
|
2269 return ok(table_type{});
|
|
2270 }
|
|
2271
|
|
2272 // XXX at lest one newline is needed.
|
|
2273 using skip_line = repeat<
|
|
2274 sequence<maybe<lex_ws>, maybe<lex_comment>, lex_newline>, at_least<1>>;
|
|
2275 skip_line::invoke(loc);
|
|
2276 lex_ws::invoke(loc);
|
|
2277
|
|
2278 table_type tab;
|
|
2279 while(loc.iter() != loc.end())
|
|
2280 {
|
|
2281 lex_ws::invoke(loc);
|
|
2282 const auto before = loc.iter();
|
|
2283 if(const auto tmp = parse_array_table_key(loc)) // next table found
|
|
2284 {
|
|
2285 loc.reset(before);
|
|
2286 return ok(tab);
|
|
2287 }
|
|
2288 if(const auto tmp = parse_table_key(loc)) // next table found
|
|
2289 {
|
|
2290 loc.reset(before);
|
|
2291 return ok(tab);
|
|
2292 }
|
|
2293
|
|
2294 if(const auto kv = parse_key_value_pair<value_type>(loc, 0))
|
|
2295 {
|
|
2296 const auto& kvpair = kv.unwrap();
|
|
2297 const std::vector<key>& keys = kvpair.first.first;
|
|
2298 const auto& key_reg = kvpair.first.second;
|
|
2299 const value_type& val = kvpair.second;
|
|
2300 const auto inserted =
|
|
2301 insert_nested_key(tab, val, keys.begin(), keys.end(), key_reg);
|
|
2302 if(!inserted)
|
|
2303 {
|
|
2304 return err(inserted.unwrap_err());
|
|
2305 }
|
|
2306 }
|
|
2307 else
|
|
2308 {
|
|
2309 return err(kv.unwrap_err());
|
|
2310 }
|
|
2311
|
|
2312 // comment lines are skipped by the above function call.
|
|
2313 // However, since the `skip_line` requires at least 1 newline, it fails
|
|
2314 // if the file ends with ws and/or comment without newline.
|
|
2315 // `skip_line` matches `ws? + comment? + newline`, not `ws` or `comment`
|
|
2316 // itself. To skip the last ws and/or comment, call lexers.
|
|
2317 // It does not matter if these fails, so the return value is discarded.
|
|
2318 lex_ws::invoke(loc);
|
|
2319 lex_comment::invoke(loc);
|
|
2320
|
|
2321 // skip_line is (whitespace? comment? newline)_{1,}. multiple empty lines
|
|
2322 // and comments after the last key-value pairs are allowed.
|
|
2323 const auto newline = skip_line::invoke(loc);
|
|
2324 if(!newline && loc.iter() != loc.end())
|
|
2325 {
|
|
2326 const auto before2 = loc.iter();
|
|
2327 lex_ws::invoke(loc); // skip whitespace
|
|
2328 const auto msg = format_underline("toml::parse_table: "
|
|
2329 "invalid line format", {{source_location(loc), concat_to_string(
|
|
2330 "expected newline, but got '", show_char(*loc.iter()), "'.")}});
|
|
2331 loc.reset(before2);
|
|
2332 return err(msg);
|
|
2333 }
|
|
2334
|
|
2335 // the skip_lines only matches with lines that includes newline.
|
|
2336 // to skip the last line that includes comment and/or whitespace
|
|
2337 // but no newline, call them one more time.
|
|
2338 lex_ws::invoke(loc);
|
|
2339 lex_comment::invoke(loc);
|
|
2340 }
|
|
2341 return ok(tab);
|
|
2342 }
|
|
2343
|
|
2344 template<typename Value>
|
|
2345 result<Value, std::string> parse_toml_file(location& loc)
|
|
2346 {
|
|
2347 using value_type = Value;
|
|
2348 using table_type = typename value_type::table_type;
|
|
2349
|
|
2350 const auto first = loc.iter();
|
|
2351 if(first == loc.end())
|
|
2352 {
|
|
2353 // For empty files, return an empty table with an empty region (zero-length).
|
|
2354 // Without the region, error messages would miss the filename.
|
|
2355 return ok(value_type(table_type{}, region(loc, first, first), {}));
|
|
2356 }
|
|
2357
|
|
2358 // put the first line as a region of a file
|
|
2359 // Here first != loc.end(), so taking std::next is okay
|
|
2360 const region file(loc, first, std::next(loc.iter()));
|
|
2361
|
|
2362 // The first successive comments that are separated from the first value
|
|
2363 // by an empty line are for a file itself.
|
|
2364 // ```toml
|
|
2365 // # this is a comment for a file.
|
|
2366 //
|
|
2367 // key = "the first value"
|
|
2368 // ```
|
|
2369 // ```toml
|
|
2370 // # this is a comment for "the first value".
|
|
2371 // key = "the first value"
|
|
2372 // ```
|
|
2373 std::vector<std::string> comments;
|
|
2374 using lex_first_comments = sequence<
|
|
2375 repeat<sequence<maybe<lex_ws>, lex_comment, lex_newline>, at_least<1>>,
|
|
2376 sequence<maybe<lex_ws>, lex_newline>
|
|
2377 >;
|
|
2378 if(const auto token = lex_first_comments::invoke(loc))
|
|
2379 {
|
|
2380 location inner_loc(loc.name(), token.unwrap().str());
|
|
2381 while(inner_loc.iter() != inner_loc.end())
|
|
2382 {
|
|
2383 maybe<lex_ws>::invoke(inner_loc); // remove ws if exists
|
|
2384 if(lex_newline::invoke(inner_loc))
|
|
2385 {
|
|
2386 assert(inner_loc.iter() == inner_loc.end());
|
|
2387 break; // empty line found.
|
|
2388 }
|
|
2389 auto com = lex_comment::invoke(inner_loc).unwrap().str();
|
|
2390 com.erase(com.begin()); // remove # sign
|
|
2391 comments.push_back(std::move(com));
|
|
2392 lex_newline::invoke(inner_loc);
|
|
2393 }
|
|
2394 }
|
|
2395
|
|
2396 table_type data;
|
|
2397 // root object is also a table, but without [tablename]
|
|
2398 if(const auto tab = parse_ml_table<value_type>(loc))
|
|
2399 {
|
|
2400 data = std::move(tab.unwrap());
|
|
2401 }
|
|
2402 else // failed (empty table is regarded as success in parse_ml_table)
|
|
2403 {
|
|
2404 return err(tab.unwrap_err());
|
|
2405 }
|
|
2406 while(loc.iter() != loc.end())
|
|
2407 {
|
|
2408 // here, the region of [table] is regarded as the table-key because
|
|
2409 // the table body is normally too big and it is not so informative
|
|
2410 // if the first key-value pair of the table is shown in the error
|
|
2411 // message.
|
|
2412 if(const auto tabkey = parse_array_table_key(loc))
|
|
2413 {
|
|
2414 const auto tab = parse_ml_table<value_type>(loc);
|
|
2415 if(!tab){return err(tab.unwrap_err());}
|
|
2416
|
|
2417 const auto& tk = tabkey.unwrap();
|
|
2418 const auto& keys = tk.first;
|
|
2419 const auto& reg = tk.second;
|
|
2420
|
|
2421 const auto inserted = insert_nested_key(data,
|
|
2422 value_type(tab.unwrap(), reg, reg.comments()),
|
|
2423 keys.begin(), keys.end(), reg,
|
|
2424 /*is_array_of_table=*/ true);
|
|
2425 if(!inserted) {return err(inserted.unwrap_err());}
|
|
2426
|
|
2427 continue;
|
|
2428 }
|
|
2429 if(const auto tabkey = parse_table_key(loc))
|
|
2430 {
|
|
2431 const auto tab = parse_ml_table<value_type>(loc);
|
|
2432 if(!tab){return err(tab.unwrap_err());}
|
|
2433
|
|
2434 const auto& tk = tabkey.unwrap();
|
|
2435 const auto& keys = tk.first;
|
|
2436 const auto& reg = tk.second;
|
|
2437
|
|
2438 const auto inserted = insert_nested_key(data,
|
|
2439 value_type(tab.unwrap(), reg, reg.comments()),
|
|
2440 keys.begin(), keys.end(), reg);
|
|
2441 if(!inserted) {return err(inserted.unwrap_err());}
|
|
2442
|
|
2443 continue;
|
|
2444 }
|
|
2445 return err(format_underline("toml::parse_toml_file: "
|
|
2446 "unknown line appeared", {{source_location(loc), "unknown format"}}));
|
|
2447 }
|
|
2448
|
|
2449 return ok(Value(std::move(data), file, comments));
|
|
2450 }
|
|
2451
|
|
2452 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2453 template<typename ...> class Table = std::unordered_map,
|
|
2454 template<typename ...> class Array = std::vector>
|
|
2455 basic_value<Comment, Table, Array>
|
|
2456 parse(std::vector<char>& letters, const std::string& fname)
|
|
2457 {
|
|
2458 using value_type = basic_value<Comment, Table, Array>;
|
|
2459
|
|
2460 // append LF.
|
|
2461 // Although TOML does not require LF at the EOF, to make parsing logic
|
|
2462 // simpler, we "normalize" the content by adding LF if it does not exist.
|
|
2463 // It also checks if the last char is CR, to avoid changing the meaning.
|
|
2464 // This is not the *best* way to deal with the last character, but is a
|
|
2465 // simple and quick fix.
|
|
2466 if(!letters.empty() && letters.back() != '\n' && letters.back() != '\r')
|
|
2467 {
|
|
2468 letters.push_back('\n');
|
|
2469 }
|
|
2470
|
|
2471 detail::location loc(std::move(fname), std::move(letters));
|
|
2472
|
|
2473 // skip BOM if exists.
|
|
2474 // XXX component of BOM (like 0xEF) exceeds the representable range of
|
|
2475 // signed char, so on some (actually, most) of the environment, these cannot
|
|
2476 // be compared to char. However, since we are always out of luck, we need to
|
|
2477 // check our chars are equivalent to BOM. To do this, first we need to
|
|
2478 // convert char to unsigned char to guarantee the comparability.
|
|
2479 if(loc.source()->size() >= 3)
|
|
2480 {
|
|
2481 std::array<unsigned char, 3> BOM;
|
|
2482 std::memcpy(BOM.data(), loc.source()->data(), 3);
|
|
2483 if(BOM[0] == 0xEF && BOM[1] == 0xBB && BOM[2] == 0xBF)
|
|
2484 {
|
|
2485 loc.advance(3); // BOM found. skip.
|
|
2486 }
|
|
2487 }
|
|
2488
|
|
2489 if (auto data = detail::parse_toml_file<value_type>(loc))
|
|
2490 {
|
|
2491 return std::move(data).unwrap();
|
|
2492 }
|
|
2493 else
|
|
2494 {
|
|
2495 throw syntax_error(std::move(data).unwrap_err(), source_location(loc));
|
|
2496 }
|
|
2497 }
|
|
2498
|
|
2499 } // detail
|
|
2500
|
|
2501 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2502 template<typename ...> class Table = std::unordered_map,
|
|
2503 template<typename ...> class Array = std::vector>
|
|
2504 basic_value<Comment, Table, Array>
|
|
2505 parse(FILE * file, const std::string& fname)
|
|
2506 {
|
|
2507 const long beg = std::ftell(file);
|
|
2508 if (beg == -1l)
|
|
2509 {
|
|
2510 throw file_io_error(errno, "Failed to access", fname);
|
|
2511 }
|
|
2512
|
|
2513 const int res_seekend = std::fseek(file, 0, SEEK_END);
|
|
2514 if (res_seekend != 0)
|
|
2515 {
|
|
2516 throw file_io_error(errno, "Failed to seek", fname);
|
|
2517 }
|
|
2518
|
|
2519 const long end = std::ftell(file);
|
|
2520 if (end == -1l)
|
|
2521 {
|
|
2522 throw file_io_error(errno, "Failed to access", fname);
|
|
2523 }
|
|
2524
|
|
2525 const auto fsize = end - beg;
|
|
2526
|
|
2527 const auto res_seekbeg = std::fseek(file, beg, SEEK_SET);
|
|
2528 if (res_seekbeg != 0)
|
|
2529 {
|
|
2530 throw file_io_error(errno, "Failed to seek", fname);
|
|
2531 }
|
|
2532
|
|
2533 // read whole file as a sequence of char
|
|
2534 assert(fsize >= 0);
|
|
2535 std::vector<char> letters(static_cast<std::size_t>(fsize));
|
|
2536 std::fread(letters.data(), sizeof(char), static_cast<std::size_t>(fsize), file);
|
|
2537
|
|
2538 return detail::parse<Comment, Table, Array>(letters, fname);
|
|
2539 }
|
|
2540
|
|
2541 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2542 template<typename ...> class Table = std::unordered_map,
|
|
2543 template<typename ...> class Array = std::vector>
|
|
2544 basic_value<Comment, Table, Array>
|
|
2545 parse(std::istream& is, std::string fname = "unknown file")
|
|
2546 {
|
|
2547 const auto beg = is.tellg();
|
|
2548 is.seekg(0, std::ios::end);
|
|
2549 const auto end = is.tellg();
|
|
2550 const auto fsize = end - beg;
|
|
2551 is.seekg(beg);
|
|
2552
|
|
2553 // read whole file as a sequence of char
|
|
2554 assert(fsize >= 0);
|
|
2555 std::vector<char> letters(static_cast<std::size_t>(fsize));
|
|
2556 is.read(letters.data(), fsize);
|
|
2557
|
|
2558 return detail::parse<Comment, Table, Array>(letters, fname);
|
|
2559 }
|
|
2560
|
|
2561 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2562 template<typename ...> class Table = std::unordered_map,
|
|
2563 template<typename ...> class Array = std::vector>
|
|
2564 basic_value<Comment, Table, Array> parse(std::string fname)
|
|
2565 {
|
|
2566 std::ifstream ifs(fname, std::ios_base::binary);
|
|
2567 if(!ifs.good())
|
|
2568 {
|
|
2569 throw std::ios_base::failure(
|
|
2570 "toml::parse: Error opening file \"" + fname + "\"");
|
|
2571 }
|
|
2572 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
|
2573 return parse<Comment, Table, Array>(ifs, std::move(fname));
|
|
2574 }
|
|
2575
|
|
2576 #ifdef TOML11_HAS_STD_FILESYSTEM
|
|
2577 // This function just forwards `parse("filename.toml")` to std::string version
|
|
2578 // to avoid the ambiguity in overload resolution.
|
|
2579 //
|
|
2580 // Both std::string and std::filesystem::path are convertible from const char*.
|
|
2581 // Without this, both parse(std::string) and parse(std::filesystem::path)
|
|
2582 // matches to parse("filename.toml"). This breaks the existing code.
|
|
2583 //
|
|
2584 // This function exactly matches to the invocation with c-string.
|
|
2585 // So this function is preferred than others and the ambiguity disappears.
|
|
2586 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2587 template<typename ...> class Table = std::unordered_map,
|
|
2588 template<typename ...> class Array = std::vector>
|
|
2589 basic_value<Comment, Table, Array> parse(const char* fname)
|
|
2590 {
|
|
2591 return parse<Comment, Table, Array>(std::string(fname));
|
|
2592 }
|
|
2593
|
|
2594 template<typename Comment = TOML11_DEFAULT_COMMENT_STRATEGY,
|
|
2595 template<typename ...> class Table = std::unordered_map,
|
|
2596 template<typename ...> class Array = std::vector>
|
|
2597 basic_value<Comment, Table, Array> parse(const std::filesystem::path& fpath)
|
|
2598 {
|
|
2599 std::ifstream ifs(fpath, std::ios_base::binary);
|
|
2600 if(!ifs.good())
|
|
2601 {
|
|
2602 throw std::ios_base::failure(
|
|
2603 "toml::parse: Error opening file \"" + fpath.string() + "\"");
|
|
2604 }
|
|
2605 ifs.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
|
2606 return parse<Comment, Table, Array>(ifs, fpath.string());
|
|
2607 }
|
|
2608 #endif // TOML11_HAS_STD_FILESYSTEM
|
|
2609
|
|
2610 } // toml
|
|
2611 #endif// TOML11_PARSER_HPP
|