comparison dep/fmt/test/scan.h @ 343:1faa72660932

*: transfer back to cmake from autotools autotools just made lots of things more complicated than they should have and many things broke (i.e. translations)
author Paper <paper@paper.us.eu.org>
date Thu, 20 Jun 2024 05:56:06 -0400
parents
children
comparison
equal deleted inserted replaced
342:adb79bdde329 343:1faa72660932
1 // Formatting library for C++ - scanning API proof of concept
2 //
3 // Copyright (c) 2019 - present, Victor Zverovich
4 // All rights reserved.
5 //
6 // For the license information refer to format.h.
7
8 #include <array>
9 #include <cassert>
10 #include <climits>
11
12 #include "fmt/format.h"
13
14 FMT_BEGIN_NAMESPACE
15 namespace detail {
16
17 inline auto is_whitespace(char c) -> bool { return c == ' ' || c == '\n'; }
18
19 // If c is a hex digit returns its numeric value, othewise -1.
20 inline auto to_hex_digit(char c) -> int {
21 if (c >= '0' && c <= '9') return c - '0';
22 if (c >= 'a' && c <= 'f') return c - 'a' + 10;
23 if (c >= 'A' && c <= 'F') return c - 'A' + 10;
24 return -1;
25 }
26
27 struct maybe_contiguous_range {
28 const char* begin;
29 const char* end;
30
31 explicit operator bool() const { return begin != nullptr; }
32 };
33
34 class scan_buffer {
35 private:
36 const char* ptr_;
37 const char* end_;
38 bool contiguous_;
39
40 protected:
41 scan_buffer(const char* ptr, const char* end, bool contiguous)
42 : ptr_(ptr), end_(end), contiguous_(contiguous) {}
43 ~scan_buffer() = default;
44
45 void set(string_view buf) {
46 ptr_ = buf.begin();
47 end_ = buf.end();
48 }
49
50 auto ptr() const -> const char* { return ptr_; }
51
52 public:
53 scan_buffer(const scan_buffer&) = delete;
54 void operator=(const scan_buffer&) = delete;
55
56 // Fills the buffer with more input if available.
57 virtual void consume() = 0;
58
59 class sentinel {};
60
61 class iterator {
62 private:
63 const char** ptr_;
64 scan_buffer* buf_; // This could be merged with ptr_.
65 char value_;
66
67 static auto get_sentinel() -> const char** {
68 static const char* ptr = nullptr;
69 return &ptr;
70 }
71
72 friend class scan_buffer;
73
74 friend auto operator==(iterator lhs, sentinel) -> bool {
75 return *lhs.ptr_ == nullptr;
76 }
77 friend auto operator!=(iterator lhs, sentinel) -> bool {
78 return *lhs.ptr_ != nullptr;
79 }
80
81 iterator(scan_buffer* buf) : buf_(buf) {
82 if (buf->ptr_ == buf->end_) {
83 ptr_ = get_sentinel();
84 return;
85 }
86 ptr_ = &buf->ptr_;
87 value_ = *buf->ptr_;
88 }
89
90 friend scan_buffer& get_buffer(iterator it) { return *it.buf_; }
91
92 public:
93 iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
94
95 auto operator++() -> iterator& {
96 if (!buf_->try_consume()) ptr_ = get_sentinel();
97 value_ = *buf_->ptr_;
98 return *this;
99 }
100 auto operator++(int) -> iterator {
101 iterator copy = *this;
102 ++*this;
103 return copy;
104 }
105 auto operator*() const -> char { return value_; }
106
107 auto base() const -> const char* { return buf_->ptr_; }
108
109 friend auto to_contiguous(iterator it) -> maybe_contiguous_range;
110 friend auto advance(iterator it, size_t n) -> iterator;
111 };
112
113 friend auto to_contiguous(iterator it) -> maybe_contiguous_range {
114 if (it.buf_->is_contiguous()) return {it.buf_->ptr_, it.buf_->end_};
115 return {nullptr, nullptr};
116 }
117 friend auto advance(iterator it, size_t n) -> iterator {
118 FMT_ASSERT(it.buf_->is_contiguous(), "");
119 const char*& ptr = it.buf_->ptr_;
120 ptr += n;
121 it.value_ = *ptr;
122 if (ptr == it.buf_->end_) it.ptr_ = iterator::get_sentinel();
123 return it;
124 }
125
126 auto begin() -> iterator { return this; }
127 auto end() -> sentinel { return {}; }
128
129 auto is_contiguous() const -> bool { return contiguous_; }
130
131 // Tries consuming a single code unit. Returns true iff there is more input.
132 auto try_consume() -> bool {
133 FMT_ASSERT(ptr_ != end_, "");
134 ++ptr_;
135 if (ptr_ != end_) return true;
136 consume();
137 return ptr_ != end_;
138 }
139 };
140
141 using scan_iterator = scan_buffer::iterator;
142 using scan_sentinel = scan_buffer::sentinel;
143
144 class string_scan_buffer : public scan_buffer {
145 private:
146 void consume() override {}
147
148 public:
149 explicit string_scan_buffer(string_view s)
150 : scan_buffer(s.begin(), s.end(), true) {}
151 };
152
153 #ifdef _WIN32
154 void flockfile(FILE* f) { _lock_file(f); }
155 void funlockfile(FILE* f) { _unlock_file(f); }
156 int getc_unlocked(FILE* f) { return _fgetc_nolock(f); }
157 #endif
158
159 // A FILE wrapper. F is FILE defined as a template parameter to make
160 // system-specific API detection work.
161 template <typename F> class file_base {
162 protected:
163 F* file_;
164
165 public:
166 file_base(F* file) : file_(file) {}
167 operator F*() const { return file_; }
168
169 // Reads a code unit from the stream.
170 auto get() -> int {
171 int result = getc_unlocked(file_);
172 if (result == EOF && ferror(file_) != 0)
173 FMT_THROW(system_error(errno, FMT_STRING("getc failed")));
174 return result;
175 }
176
177 // Puts the code unit back into the stream buffer.
178 void unget(char c) {
179 if (ungetc(c, file_) == EOF)
180 FMT_THROW(system_error(errno, FMT_STRING("ungetc failed")));
181 }
182 };
183
184 // A FILE wrapper for glibc.
185 template <typename F> class glibc_file : public file_base<F> {
186 public:
187 using file_base<F>::file_base;
188
189 // Returns the file's read buffer as a string_view.
190 auto buffer() const -> string_view {
191 return {this->file_->_IO_read_ptr,
192 to_unsigned(this->file_->_IO_read_end - this->file_->_IO_read_ptr)};
193 }
194 };
195
196 // A FILE wrapper for Apple's libc.
197 template <typename F> class apple_file : public file_base<F> {
198 public:
199 using file_base<F>::file_base;
200
201 auto buffer() const -> string_view {
202 return {reinterpret_cast<char*>(this->file_->_p),
203 to_unsigned(this->file_->_r)};
204 }
205 };
206
207 // A fallback FILE wrapper.
208 template <typename F> class fallback_file : public file_base<F> {
209 private:
210 char next_; // The next unconsumed character in the buffer.
211 bool has_next_ = false;
212
213 public:
214 using file_base<F>::file_base;
215
216 auto buffer() const -> string_view { return {&next_, has_next_ ? 1u : 0u}; }
217
218 auto get() -> int {
219 has_next_ = false;
220 return file_base<F>::get();
221 }
222
223 void unget(char c) {
224 file_base<F>::unget(c);
225 next_ = c;
226 has_next_ = true;
227 }
228 };
229
230 class file_scan_buffer : public scan_buffer {
231 private:
232 template <typename F, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr) != 0)>
233 static auto get_file(F* f, int) -> glibc_file<F> {
234 return f;
235 }
236 template <typename F, FMT_ENABLE_IF(sizeof(F::_p) != 0)>
237 static auto get_file(F* f, int) -> apple_file<F> {
238 return f;
239 }
240 static auto get_file(FILE* f, ...) -> fallback_file<FILE> { return f; }
241
242 decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_;
243
244 // Fills the buffer if it is empty.
245 void fill() {
246 string_view buf = file_.buffer();
247 if (buf.size() == 0) {
248 int c = file_.get();
249 // Put the character back since we are only filling the buffer.
250 if (c != EOF) file_.unget(static_cast<char>(c));
251 buf = file_.buffer();
252 }
253 set(buf);
254 }
255
256 void consume() override {
257 // Consume the current buffer content.
258 size_t n = to_unsigned(ptr() - file_.buffer().begin());
259 for (size_t i = 0; i != n; ++i) file_.get();
260 fill();
261 }
262
263 public:
264 explicit file_scan_buffer(FILE* f)
265 : scan_buffer(nullptr, nullptr, false), file_(f) {
266 flockfile(f);
267 fill();
268 }
269 ~file_scan_buffer() { funlockfile(file_); }
270 };
271 } // namespace detail
272
273 template <typename T, typename Char = char> struct scanner {
274 // A deleted default constructor indicates a disabled scanner.
275 scanner() = delete;
276 };
277
278 class scan_parse_context {
279 private:
280 string_view format_;
281
282 public:
283 using iterator = string_view::iterator;
284
285 explicit FMT_CONSTEXPR scan_parse_context(string_view format)
286 : format_(format) {}
287
288 FMT_CONSTEXPR auto begin() const -> iterator { return format_.begin(); }
289 FMT_CONSTEXPR auto end() const -> iterator { return format_.end(); }
290
291 void advance_to(iterator it) {
292 format_.remove_prefix(detail::to_unsigned(it - begin()));
293 }
294 };
295
296 namespace detail {
297 enum class scan_type {
298 none_type,
299 int_type,
300 uint_type,
301 long_long_type,
302 ulong_long_type,
303 string_type,
304 string_view_type,
305 custom_type
306 };
307
308 template <typename Context> struct custom_scan_arg {
309 void* value;
310 void (*scan)(void* arg, scan_parse_context& parse_ctx, Context& ctx);
311 };
312 } // namespace detail
313
314 // A scan argument. Context is a template parameter for the compiled API where
315 // output can be unbuffered.
316 template <typename Context> class basic_scan_arg {
317 private:
318 using scan_type = detail::scan_type;
319 scan_type type_;
320 union {
321 int* int_value_;
322 unsigned* uint_value_;
323 long long* long_long_value_;
324 unsigned long long* ulong_long_value_;
325 std::string* string_;
326 string_view* string_view_;
327 detail::custom_scan_arg<Context> custom_;
328 // TODO: more types
329 };
330
331 template <typename T>
332 static void scan_custom_arg(void* arg, scan_parse_context& parse_ctx,
333 Context& ctx) {
334 auto s = scanner<T>();
335 parse_ctx.advance_to(s.parse(parse_ctx));
336 ctx.advance_to(s.scan(*static_cast<T*>(arg), ctx));
337 }
338
339 public:
340 FMT_CONSTEXPR basic_scan_arg()
341 : type_(scan_type::none_type), int_value_(nullptr) {}
342 FMT_CONSTEXPR basic_scan_arg(int& value)
343 : type_(scan_type::int_type), int_value_(&value) {}
344 FMT_CONSTEXPR basic_scan_arg(unsigned& value)
345 : type_(scan_type::uint_type), uint_value_(&value) {}
346 FMT_CONSTEXPR basic_scan_arg(long long& value)
347 : type_(scan_type::long_long_type), long_long_value_(&value) {}
348 FMT_CONSTEXPR basic_scan_arg(unsigned long long& value)
349 : type_(scan_type::ulong_long_type), ulong_long_value_(&value) {}
350 FMT_CONSTEXPR basic_scan_arg(std::string& value)
351 : type_(scan_type::string_type), string_(&value) {}
352 FMT_CONSTEXPR basic_scan_arg(string_view& value)
353 : type_(scan_type::string_view_type), string_view_(&value) {}
354 template <typename T>
355 FMT_CONSTEXPR basic_scan_arg(T& value) : type_(scan_type::custom_type) {
356 custom_.value = &value;
357 custom_.scan = scan_custom_arg<T>;
358 }
359
360 constexpr explicit operator bool() const noexcept {
361 return type_ != scan_type::none_type;
362 }
363
364 auto type() const -> detail::scan_type { return type_; }
365
366 template <typename Visitor>
367 auto visit(Visitor&& vis) -> decltype(vis(monostate())) {
368 switch (type_) {
369 case scan_type::none_type:
370 break;
371 case scan_type::int_type:
372 return vis(*int_value_);
373 case scan_type::uint_type:
374 return vis(*uint_value_);
375 case scan_type::long_long_type:
376 return vis(*long_long_value_);
377 case scan_type::ulong_long_type:
378 return vis(*ulong_long_value_);
379 case scan_type::string_type:
380 return vis(*string_);
381 case scan_type::string_view_type:
382 return vis(*string_view_);
383 case scan_type::custom_type:
384 break;
385 }
386 return vis(monostate());
387 }
388
389 auto scan_custom(const char* parse_begin, scan_parse_context& parse_ctx,
390 Context& ctx) const -> bool {
391 if (type_ != scan_type::custom_type) return false;
392 parse_ctx.advance_to(parse_begin);
393 custom_.scan(custom_.value, parse_ctx, ctx);
394 return true;
395 }
396 };
397
398 class scan_context;
399 using scan_arg = basic_scan_arg<scan_context>;
400
401 struct scan_args {
402 int size;
403 const scan_arg* data;
404
405 template <size_t N>
406 FMT_CONSTEXPR scan_args(const std::array<scan_arg, N>& store)
407 : size(N), data(store.data()) {
408 static_assert(N < INT_MAX, "too many arguments");
409 }
410 };
411
412 class scan_context {
413 private:
414 detail::scan_buffer& buf_;
415 scan_args args_;
416
417 public:
418 using iterator = detail::scan_iterator;
419 using sentinel = detail::scan_sentinel;
420
421 explicit FMT_CONSTEXPR scan_context(detail::scan_buffer& buf, scan_args args)
422 : buf_(buf), args_(args) {}
423
424 FMT_CONSTEXPR auto arg(int id) const -> scan_arg {
425 return id < args_.size ? args_.data[id] : scan_arg();
426 }
427
428 auto begin() const -> iterator { return buf_.begin(); }
429 auto end() const -> sentinel { return {}; }
430
431 void advance_to(iterator) { buf_.consume(); }
432 };
433
434 namespace detail {
435
436 const char* parse_scan_specs(const char* begin, const char* end,
437 format_specs<>& specs, scan_type) {
438 while (begin != end) {
439 switch (to_ascii(*begin)) {
440 // TODO: parse more scan format specifiers
441 case 'x':
442 specs.type = presentation_type::hex_lower;
443 ++begin;
444 break;
445 case '}':
446 return begin;
447 }
448 }
449 return begin;
450 }
451
452 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
453 auto read(scan_iterator it, T& value)
454 -> scan_iterator {
455 if (it == scan_sentinel()) return it;
456 char c = *it;
457 if (c < '0' || c > '9') throw_format_error("invalid input");
458
459 int num_digits = 0;
460 T n = 0, prev = 0;
461 char prev_digit = c;
462 do {
463 prev = n;
464 n = n * 10 + static_cast<unsigned>(c - '0');
465 prev_digit = c;
466 c = *++it;
467 ++num_digits;
468 if (c < '0' || c > '9') break;
469 } while (it != scan_sentinel());
470
471 // Check overflow.
472 if (num_digits <= std::numeric_limits<int>::digits10) {
473 value = n;
474 return it;
475 }
476 unsigned max = to_unsigned((std::numeric_limits<int>::max)());
477 if (num_digits == std::numeric_limits<int>::digits10 + 1 &&
478 prev * 10ull + unsigned(prev_digit - '0') <= max) {
479 value = n;
480 } else {
481 throw_format_error("number is too big");
482 }
483 return it;
484 }
485
486 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
487 auto read_hex(scan_iterator it, T& value)
488 -> scan_iterator {
489 if (it == scan_sentinel()) return it;
490 int digit = to_hex_digit(*it);
491 if (digit < 0) throw_format_error("invalid input");
492
493 int num_digits = 0;
494 T n = 0;
495 do {
496 n = (n << 4) + static_cast<unsigned>(digit);
497 ++num_digits;
498 digit = to_hex_digit(*++it);
499 if (digit < 0) break;
500 } while (it != scan_sentinel());
501
502 // Check overflow.
503 if (num_digits <= (std::numeric_limits<T>::digits >> 2))
504 value = n;
505 else
506 throw_format_error("number is too big");
507 return it;
508 }
509
510 template <typename T, FMT_ENABLE_IF(std::is_unsigned<T>::value)>
511 auto read(scan_iterator it, T& value, const format_specs<>& specs)
512 -> scan_iterator {
513 if (specs.type == presentation_type::hex_lower)
514 return read_hex(it, value);
515 return read(it, value);
516 }
517
518 template <typename T, FMT_ENABLE_IF(std::is_signed<T>::value)>
519 auto read(scan_iterator it, T& value, const format_specs<>& = {})
520 -> scan_iterator {
521 bool negative = it != scan_sentinel() && *it == '-';
522 if (negative) {
523 ++it;
524 if (it == scan_sentinel()) throw_format_error("invalid input");
525 }
526 using unsigned_type = typename std::make_unsigned<T>::type;
527 unsigned_type abs_value = 0;
528 it = read(it, abs_value);
529 auto n = static_cast<T>(abs_value);
530 value = negative ? -n : n;
531 return it;
532 }
533
534 auto read(scan_iterator it, std::string& value, const format_specs<>& = {})
535 -> scan_iterator {
536 while (it != scan_sentinel() && *it != ' ') value.push_back(*it++);
537 return it;
538 }
539
540 auto read(scan_iterator it, string_view& value, const format_specs<>& = {})
541 -> scan_iterator {
542 auto range = to_contiguous(it);
543 // This could also be checked at compile time in scan.
544 if (!range) throw_format_error("string_view requires contiguous input");
545 auto p = range.begin;
546 while (p != range.end && *p != ' ') ++p;
547 size_t size = to_unsigned(p - range.begin);
548 value = {range.begin, size};
549 return advance(it, size);
550 }
551
552 auto read(scan_iterator it, monostate, const format_specs<>& = {})
553 -> scan_iterator {
554 return it;
555 }
556
557 // An argument scanner that uses the default format, e.g. decimal for integers.
558 struct default_arg_scanner {
559 scan_iterator it;
560
561 template <typename T> FMT_INLINE auto operator()(T&& value) -> scan_iterator {
562 return read(it, value);
563 }
564 };
565
566 // An argument scanner with format specifiers.
567 struct arg_scanner {
568 scan_iterator it;
569 const format_specs<>& specs;
570
571 template <typename T> auto operator()(T&& value) -> scan_iterator {
572 return read(it, value, specs);
573 }
574 };
575
576 struct scan_handler : error_handler {
577 private:
578 scan_parse_context parse_ctx_;
579 scan_context scan_ctx_;
580 int next_arg_id_;
581
582 using sentinel = scan_buffer::sentinel;
583
584 public:
585 FMT_CONSTEXPR scan_handler(string_view format, scan_buffer& buf,
586 scan_args args)
587 : parse_ctx_(format), scan_ctx_(buf, args), next_arg_id_(0) {}
588
589 auto pos() const -> scan_buffer::iterator { return scan_ctx_.begin(); }
590
591 void on_text(const char* begin, const char* end) {
592 if (begin == end) return;
593 auto it = scan_ctx_.begin();
594 for (; begin != end; ++begin, ++it) {
595 if (it == sentinel() || *begin != *it) on_error("invalid input");
596 }
597 scan_ctx_.advance_to(it);
598 }
599
600 FMT_CONSTEXPR auto on_arg_id() -> int { return on_arg_id(next_arg_id_++); }
601 FMT_CONSTEXPR auto on_arg_id(int id) -> int {
602 if (!scan_ctx_.arg(id)) on_error("argument index out of range");
603 return id;
604 }
605 FMT_CONSTEXPR auto on_arg_id(string_view id) -> int {
606 if (id.data()) on_error("invalid format");
607 return 0;
608 }
609
610 void on_replacement_field(int arg_id, const char*) {
611 scan_arg arg = scan_ctx_.arg(arg_id);
612 auto it = scan_ctx_.begin();
613 while (it != sentinel() && is_whitespace(*it)) ++it;
614 scan_ctx_.advance_to(arg.visit(default_arg_scanner{it}));
615 }
616
617 auto on_format_specs(int arg_id, const char* begin, const char* end) -> const
618 char* {
619 scan_arg arg = scan_ctx_.arg(arg_id);
620 if (arg.scan_custom(begin, parse_ctx_, scan_ctx_))
621 return parse_ctx_.begin();
622 auto specs = format_specs<>();
623 begin = parse_scan_specs(begin, end, specs, arg.type());
624 if (begin == end || *begin != '}') on_error("missing '}' in format string");
625 scan_ctx_.advance_to(arg.visit(arg_scanner{scan_ctx_.begin(), specs}));
626 return begin;
627 }
628
629 void on_error(const char* message) { error_handler::on_error(message); }
630 };
631 } // namespace detail
632
633 template <typename... T>
634 auto make_scan_args(T&... args) -> std::array<scan_arg, sizeof...(T)> {
635 return {{args...}};
636 }
637
638 void vscan(detail::scan_buffer& buf, string_view fmt, scan_args args) {
639 auto h = detail::scan_handler(fmt, buf, args);
640 detail::parse_format_string<false>(fmt, h);
641 }
642
643 template <typename... T>
644 auto scan(string_view input, string_view fmt, T&... args)
645 -> string_view::iterator {
646 auto&& buf = detail::string_scan_buffer(input);
647 vscan(buf, fmt, make_scan_args(args...));
648 return input.begin() + (buf.begin().base() - input.data());
649 }
650
651 template <typename InputRange, typename... T,
652 FMT_ENABLE_IF(!std::is_convertible<InputRange, string_view>::value)>
653 auto scan(InputRange&& input, string_view fmt, T&... args)
654 -> decltype(std::begin(input)) {
655 auto it = std::begin(input);
656 vscan(get_buffer(it), fmt, make_scan_args(args...));
657 return it;
658 }
659
660 template <typename... T> bool scan(std::FILE* f, string_view fmt, T&... args) {
661 auto&& buf = detail::file_scan_buffer(f);
662 vscan(buf, fmt, make_scan_args(args...));
663 return buf.begin() != buf.end();
664 }
665
666 FMT_END_NAMESPACE