318
|
1 // Copyright Toru Niina 2017.
|
|
2 // Distributed under the MIT License.
|
|
3 #ifndef TOML11_COMBINATOR_HPP
|
|
4 #define TOML11_COMBINATOR_HPP
|
|
5 #include <cassert>
|
|
6 #include <cctype>
|
|
7 #include <cstdio>
|
|
8
|
|
9 #include <array>
|
|
10 #include <iomanip>
|
|
11 #include <iterator>
|
|
12 #include <limits>
|
|
13 #include <type_traits>
|
|
14
|
|
15 #include "region.hpp"
|
|
16 #include "result.hpp"
|
|
17 #include "traits.hpp"
|
|
18 #include "utility.hpp"
|
|
19
|
|
20 // they scans characters and returns region if it matches to the condition.
|
|
21 // when they fail, it does not change the location.
|
|
22 // in lexer.hpp, these are used.
|
|
23
|
|
24 namespace toml
|
|
25 {
|
|
26 namespace detail
|
|
27 {
|
|
28
|
|
29 // to output character as an error message.
|
|
30 inline std::string show_char(const char c)
|
|
31 {
|
|
32 // It suppresses an error that occurs only in Debug mode of MSVC++ on Windows.
|
|
33 // I'm not completely sure but they check the value of char to be in the
|
|
34 // range [0, 256) and some of the COMPLETELY VALID utf-8 character sometimes
|
|
35 // has negative value (if char has sign). So here it re-interprets c as
|
|
36 // unsigned char through pointer. In general, converting pointer to a
|
|
37 // pointer that has different type cause UB, but `(signed|unsigned)?char`
|
|
38 // are one of the exceptions. Converting pointer only to char and std::byte
|
|
39 // (c++17) are valid.
|
|
40 if(std::isgraph(*reinterpret_cast<unsigned char const*>(std::addressof(c))))
|
|
41 {
|
|
42 return std::string(1, c);
|
|
43 }
|
|
44 else
|
|
45 {
|
|
46 std::array<char, 5> buf;
|
|
47 buf.fill('\0');
|
|
48 const auto r = std::snprintf(
|
|
49 buf.data(), buf.size(), "0x%02x", static_cast<int>(c) & 0xFF);
|
|
50 (void) r; // Unused variable warning
|
|
51 assert(r == static_cast<int>(buf.size()) - 1);
|
|
52 return std::string(buf.data());
|
|
53 }
|
|
54 }
|
|
55
|
|
56 template<char C>
|
|
57 struct character
|
|
58 {
|
|
59 static constexpr char target = C;
|
|
60
|
|
61 static result<region, none_t>
|
|
62 invoke(location& loc)
|
|
63 {
|
|
64 if(loc.iter() == loc.end()) {return none();}
|
|
65 const auto first = loc.iter();
|
|
66
|
|
67 const char c = *(loc.iter());
|
|
68 if(c != target)
|
|
69 {
|
|
70 return none();
|
|
71 }
|
|
72 loc.advance(); // update location
|
|
73
|
|
74 return ok(region(loc, first, loc.iter()));
|
|
75 }
|
|
76 };
|
|
77 template<char C>
|
|
78 constexpr char character<C>::target;
|
|
79
|
|
80 // closed interval [Low, Up]. both Low and Up are included.
|
|
81 template<char Low, char Up>
|
|
82 struct in_range
|
|
83 {
|
|
84 // assuming ascii part of UTF-8...
|
|
85 static_assert(Low <= Up, "lower bound should be less than upper bound.");
|
|
86
|
|
87 static constexpr char upper = Up;
|
|
88 static constexpr char lower = Low;
|
|
89
|
|
90 static result<region, none_t>
|
|
91 invoke(location& loc)
|
|
92 {
|
|
93 if(loc.iter() == loc.end()) {return none();}
|
|
94 const auto first = loc.iter();
|
|
95
|
|
96 const char c = *(loc.iter());
|
|
97 if(c < lower || upper < c)
|
|
98 {
|
|
99 return none();
|
|
100 }
|
|
101
|
|
102 loc.advance();
|
|
103 return ok(region(loc, first, loc.iter()));
|
|
104 }
|
|
105 };
|
|
106 template<char L, char U> constexpr char in_range<L, U>::upper;
|
|
107 template<char L, char U> constexpr char in_range<L, U>::lower;
|
|
108
|
|
109 // keep iterator if `Combinator` matches. otherwise, increment `iter` by 1 char.
|
|
110 // for detecting invalid characters, like control sequences in toml string.
|
|
111 template<typename Combinator>
|
|
112 struct exclude
|
|
113 {
|
|
114 static result<region, none_t>
|
|
115 invoke(location& loc)
|
|
116 {
|
|
117 if(loc.iter() == loc.end()) {return none();}
|
|
118 auto first = loc.iter();
|
|
119
|
|
120 auto rslt = Combinator::invoke(loc);
|
|
121 if(rslt.is_ok())
|
|
122 {
|
|
123 loc.reset(first);
|
|
124 return none();
|
|
125 }
|
|
126 loc.reset(std::next(first)); // XXX maybe loc.advance() is okay but...
|
|
127 return ok(region(loc, first, loc.iter()));
|
|
128 }
|
|
129 };
|
|
130
|
|
131 // increment `iter`, if matches. otherwise, just return empty string.
|
|
132 template<typename Combinator>
|
|
133 struct maybe
|
|
134 {
|
|
135 static result<region, none_t>
|
|
136 invoke(location& loc)
|
|
137 {
|
|
138 const auto rslt = Combinator::invoke(loc);
|
|
139 if(rslt.is_ok())
|
|
140 {
|
|
141 return rslt;
|
|
142 }
|
|
143 return ok(region(loc));
|
|
144 }
|
|
145 };
|
|
146
|
|
147 template<typename ... Ts>
|
|
148 struct sequence;
|
|
149
|
|
150 template<typename Head, typename ... Tail>
|
|
151 struct sequence<Head, Tail...>
|
|
152 {
|
|
153 static result<region, none_t>
|
|
154 invoke(location& loc)
|
|
155 {
|
|
156 const auto first = loc.iter();
|
|
157 auto rslt = Head::invoke(loc);
|
|
158 if(rslt.is_err())
|
|
159 {
|
|
160 loc.reset(first);
|
|
161 return none();
|
|
162 }
|
|
163 return sequence<Tail...>::invoke(loc, std::move(rslt.unwrap()), first);
|
|
164 }
|
|
165
|
|
166 // called from the above function only, recursively.
|
|
167 template<typename Iterator>
|
|
168 static result<region, none_t>
|
|
169 invoke(location& loc, region reg, Iterator first)
|
|
170 {
|
|
171 const auto rslt = Head::invoke(loc);
|
|
172 if(rslt.is_err())
|
|
173 {
|
|
174 loc.reset(first);
|
|
175 return none();
|
|
176 }
|
|
177 reg += rslt.unwrap(); // concat regions
|
|
178 return sequence<Tail...>::invoke(loc, std::move(reg), first);
|
|
179 }
|
|
180 };
|
|
181
|
|
182 template<typename Head>
|
|
183 struct sequence<Head>
|
|
184 {
|
|
185 // would be called from sequence<T ...>::invoke only.
|
|
186 template<typename Iterator>
|
|
187 static result<region, none_t>
|
|
188 invoke(location& loc, region reg, Iterator first)
|
|
189 {
|
|
190 const auto rslt = Head::invoke(loc);
|
|
191 if(rslt.is_err())
|
|
192 {
|
|
193 loc.reset(first);
|
|
194 return none();
|
|
195 }
|
|
196 reg += rslt.unwrap(); // concat regions
|
|
197 return ok(reg);
|
|
198 }
|
|
199 };
|
|
200
|
|
201 template<typename ... Ts>
|
|
202 struct either;
|
|
203
|
|
204 template<typename Head, typename ... Tail>
|
|
205 struct either<Head, Tail...>
|
|
206 {
|
|
207 static result<region, none_t>
|
|
208 invoke(location& loc)
|
|
209 {
|
|
210 const auto rslt = Head::invoke(loc);
|
|
211 if(rslt.is_ok()) {return rslt;}
|
|
212 return either<Tail...>::invoke(loc);
|
|
213 }
|
|
214 };
|
|
215 template<typename Head>
|
|
216 struct either<Head>
|
|
217 {
|
|
218 static result<region, none_t>
|
|
219 invoke(location& loc)
|
|
220 {
|
|
221 return Head::invoke(loc);
|
|
222 }
|
|
223 };
|
|
224
|
|
225 template<typename T, typename N>
|
|
226 struct repeat;
|
|
227
|
|
228 template<std::size_t N> struct exactly{};
|
|
229 template<std::size_t N> struct at_least{};
|
|
230 struct unlimited{};
|
|
231
|
|
232 template<typename T, std::size_t N>
|
|
233 struct repeat<T, exactly<N>>
|
|
234 {
|
|
235 static result<region, none_t>
|
|
236 invoke(location& loc)
|
|
237 {
|
|
238 region retval(loc);
|
|
239 const auto first = loc.iter();
|
|
240 for(std::size_t i=0; i<N; ++i)
|
|
241 {
|
|
242 auto rslt = T::invoke(loc);
|
|
243 if(rslt.is_err())
|
|
244 {
|
|
245 loc.reset(first);
|
|
246 return none();
|
|
247 }
|
|
248 retval += rslt.unwrap();
|
|
249 }
|
|
250 return ok(std::move(retval));
|
|
251 }
|
|
252 };
|
|
253
|
|
254 template<typename T, std::size_t N>
|
|
255 struct repeat<T, at_least<N>>
|
|
256 {
|
|
257 static result<region, none_t>
|
|
258 invoke(location& loc)
|
|
259 {
|
|
260 region retval(loc);
|
|
261
|
|
262 const auto first = loc.iter();
|
|
263 for(std::size_t i=0; i<N; ++i)
|
|
264 {
|
|
265 auto rslt = T::invoke(loc);
|
|
266 if(rslt.is_err())
|
|
267 {
|
|
268 loc.reset(first);
|
|
269 return none();
|
|
270 }
|
|
271 retval += rslt.unwrap();
|
|
272 }
|
|
273 while(true)
|
|
274 {
|
|
275 auto rslt = T::invoke(loc);
|
|
276 if(rslt.is_err())
|
|
277 {
|
|
278 return ok(std::move(retval));
|
|
279 }
|
|
280 retval += rslt.unwrap();
|
|
281 }
|
|
282 }
|
|
283 };
|
|
284
|
|
285 template<typename T>
|
|
286 struct repeat<T, unlimited>
|
|
287 {
|
|
288 static result<region, none_t>
|
|
289 invoke(location& loc)
|
|
290 {
|
|
291 region retval(loc);
|
|
292 while(true)
|
|
293 {
|
|
294 auto rslt = T::invoke(loc);
|
|
295 if(rslt.is_err())
|
|
296 {
|
|
297 return ok(std::move(retval));
|
|
298 }
|
|
299 retval += rslt.unwrap();
|
|
300 }
|
|
301 }
|
|
302 };
|
|
303
|
|
304 } // detail
|
|
305 } // toml
|
|
306 #endif// TOML11_COMBINATOR_HPP
|