comparison dep/pugixml/src/pugixml.cpp @ 367:8d45d892be88 default tip

*: instead of pugixml, use Qt XML features this means we have one extra Qt dependency though...
author Paper <paper@tflc.us>
date Sun, 17 Nov 2024 22:55:47 -0500
parents 886f66775f31
children
comparison
equal deleted inserted replaced
366:886f66775f31 367:8d45d892be88
1 /**
2 * pugixml parser - version 1.14
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at https://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16
17 #include "pugixml.hpp"
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24
25 #ifdef PUGIXML_WCHAR_MODE
26 # include <wchar.h>
27 #endif
28
29 #ifndef PUGIXML_NO_XPATH
30 # include <math.h>
31 # include <float.h>
32 #endif
33
34 #ifndef PUGIXML_NO_STL
35 # include <istream>
36 # include <ostream>
37 # include <string>
38 #endif
39
40 // For placement new
41 #include <new>
42
43 // For load_file
44 #if defined(__linux__) || defined(__APPLE__)
45 #include <sys/stat.h>
46 #endif
47
48 #ifdef _MSC_VER
49 # pragma warning(push)
50 # pragma warning(disable: 4127) // conditional expression is constant
51 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
52 # pragma warning(disable: 4702) // unreachable code
53 # pragma warning(disable: 4996) // this function or variable may be unsafe
54 #endif
55
56 #if defined(_MSC_VER) && defined(__c2__)
57 # pragma clang diagnostic push
58 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
59 #endif
60
61 #ifdef __INTEL_COMPILER
62 # pragma warning(disable: 177) // function was declared but never referenced
63 # pragma warning(disable: 279) // controlling expression is constant
64 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
65 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
66 #endif
67
68 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
69 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
70 #endif
71
72 #ifdef __BORLANDC__
73 # pragma option push
74 # pragma warn -8008 // condition is always false
75 # pragma warn -8066 // unreachable code
76 #endif
77
78 #ifdef __SNC__
79 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
80 # pragma diag_suppress=178 // function was declared but never referenced
81 # pragma diag_suppress=237 // controlling expression is constant
82 #endif
83
84 #ifdef __TI_COMPILER_VERSION__
85 # pragma diag_suppress 179 // function was declared but never referenced
86 #endif
87
88 // Inlining controls
89 #if defined(_MSC_VER) && _MSC_VER >= 1300
90 # define PUGI_IMPL_NO_INLINE __declspec(noinline)
91 #elif defined(__GNUC__)
92 # define PUGI_IMPL_NO_INLINE __attribute__((noinline))
93 #else
94 # define PUGI_IMPL_NO_INLINE
95 #endif
96
97 // Branch weight controls
98 #if defined(__GNUC__) && !defined(__c2__)
99 # define PUGI_IMPL_UNLIKELY(cond) __builtin_expect(cond, 0)
100 #else
101 # define PUGI_IMPL_UNLIKELY(cond) (cond)
102 #endif
103
104 // Simple static assertion
105 #define PUGI_IMPL_STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
106
107 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
108 #ifdef __DMC__
109 # define PUGI_IMPL_DMC_VOLATILE volatile
110 #else
111 # define PUGI_IMPL_DMC_VOLATILE
112 #endif
113
114 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
115 #if defined(__clang__) && defined(__has_attribute)
116 # if __has_attribute(no_sanitize)
117 # define PUGI_IMPL_UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
118 # else
119 # define PUGI_IMPL_UNSIGNED_OVERFLOW
120 # endif
121 #else
122 # define PUGI_IMPL_UNSIGNED_OVERFLOW
123 #endif
124
125 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
126 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
127 using std::memcpy;
128 using std::memmove;
129 using std::memset;
130 #endif
131
132 // Old versions of GCC do not define ::malloc and ::free depending on header include order
133 #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
134 using std::malloc;
135 using std::free;
136 #endif
137
138 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
139 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
140 # define LLONG_MIN (-LLONG_MAX - 1LL)
141 # define LLONG_MAX __LONG_LONG_MAX__
142 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
143 #endif
144
145 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
146 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
147 # define PUGI_IMPL_MSVC_CRT_VERSION _MSC_VER
148 #elif defined(_WIN32_WCE)
149 # define PUGI_IMPL_MSVC_CRT_VERSION 1310 // MSVC7.1
150 #endif
151
152 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
153 #if __cplusplus >= 201103
154 # define PUGI_IMPL_SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
155 #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
156 # define PUGI_IMPL_SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
157 #elif defined(__APPLE__) && __clang_major__ >= 14 // Xcode 14 marks sprintf as deprecated while still using C++98 by default
158 # define PUGI_IMPL_SNPRINTF(buf, fmt, arg1, arg2) snprintf(buf, sizeof(buf), fmt, arg1, arg2)
159 #else
160 # define PUGI_IMPL_SNPRINTF sprintf
161 #endif
162
163 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
164 #ifdef PUGIXML_HEADER_ONLY
165 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl {
166 # define PUGI_IMPL_NS_END } }
167 # define PUGI_IMPL_FN inline
168 # define PUGI_IMPL_FN_NO_INLINE inline
169 #else
170 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
171 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl {
172 # define PUGI_IMPL_NS_END } }
173 # else
174 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl { namespace {
175 # define PUGI_IMPL_NS_END } } }
176 # endif
177 # define PUGI_IMPL_FN
178 # define PUGI_IMPL_FN_NO_INLINE PUGI_IMPL_NO_INLINE
179 #endif
180
181 // uintptr_t
182 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
183 namespace pugi
184 {
185 # ifndef _UINTPTR_T_DEFINED
186 typedef size_t uintptr_t;
187 # endif
188
189 typedef unsigned __int8 uint8_t;
190 typedef unsigned __int16 uint16_t;
191 typedef unsigned __int32 uint32_t;
192 }
193 #else
194 # include <stdint.h>
195 #endif
196
197 // Memory allocation
198 PUGI_IMPL_NS_BEGIN
199 PUGI_IMPL_FN void* default_allocate(size_t size)
200 {
201 return malloc(size);
202 }
203
204 PUGI_IMPL_FN void default_deallocate(void* ptr)
205 {
206 free(ptr);
207 }
208
209 template <typename T>
210 struct xml_memory_management_function_storage
211 {
212 static allocation_function allocate;
213 static deallocation_function deallocate;
214 };
215
216 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
217 // Without a template<> we'll get multiple definitions of the same static
218 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
219 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
220
221 typedef xml_memory_management_function_storage<int> xml_memory;
222 PUGI_IMPL_NS_END
223
224 // String utilities
225 PUGI_IMPL_NS_BEGIN
226 // Get string length
227 PUGI_IMPL_FN size_t strlength(const char_t* s)
228 {
229 assert(s);
230
231 #ifdef PUGIXML_WCHAR_MODE
232 return wcslen(s);
233 #else
234 return strlen(s);
235 #endif
236 }
237
238 // Compare two strings
239 PUGI_IMPL_FN bool strequal(const char_t* src, const char_t* dst)
240 {
241 assert(src && dst);
242
243 #ifdef PUGIXML_WCHAR_MODE
244 return wcscmp(src, dst) == 0;
245 #else
246 return strcmp(src, dst) == 0;
247 #endif
248 }
249
250 // Compare lhs with [rhs_begin, rhs_end)
251 PUGI_IMPL_FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
252 {
253 for (size_t i = 0; i < count; ++i)
254 if (lhs[i] != rhs[i])
255 return false;
256
257 return lhs[count] == 0;
258 }
259
260 // Get length of wide string, even if CRT lacks wide character support
261 PUGI_IMPL_FN size_t strlength_wide(const wchar_t* s)
262 {
263 assert(s);
264
265 #ifdef PUGIXML_WCHAR_MODE
266 return wcslen(s);
267 #else
268 const wchar_t* end = s;
269 while (*end) end++;
270 return static_cast<size_t>(end - s);
271 #endif
272 }
273 PUGI_IMPL_NS_END
274
275 // auto_ptr-like object for exception recovery
276 PUGI_IMPL_NS_BEGIN
277 template <typename T> struct auto_deleter
278 {
279 typedef void (*D)(T*);
280
281 T* data;
282 D deleter;
283
284 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
285 {
286 }
287
288 ~auto_deleter()
289 {
290 if (data) deleter(data);
291 }
292
293 T* release()
294 {
295 T* result = data;
296 data = 0;
297 return result;
298 }
299 };
300 PUGI_IMPL_NS_END
301
302 #ifdef PUGIXML_COMPACT
303 PUGI_IMPL_NS_BEGIN
304 class compact_hash_table
305 {
306 public:
307 compact_hash_table(): _items(0), _capacity(0), _count(0)
308 {
309 }
310
311 void clear()
312 {
313 if (_items)
314 {
315 xml_memory::deallocate(_items);
316 _items = 0;
317 _capacity = 0;
318 _count = 0;
319 }
320 }
321
322 void* find(const void* key)
323 {
324 if (_capacity == 0) return 0;
325
326 item_t* item = get_item(key);
327 assert(item);
328 assert(item->key == key || (item->key == 0 && item->value == 0));
329
330 return item->value;
331 }
332
333 void insert(const void* key, void* value)
334 {
335 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
336
337 item_t* item = get_item(key);
338 assert(item);
339
340 if (item->key == 0)
341 {
342 _count++;
343 item->key = key;
344 }
345
346 item->value = value;
347 }
348
349 bool reserve(size_t extra = 16)
350 {
351 if (_count + extra >= _capacity - _capacity / 4)
352 return rehash(_count + extra);
353
354 return true;
355 }
356
357 private:
358 struct item_t
359 {
360 const void* key;
361 void* value;
362 };
363
364 item_t* _items;
365 size_t _capacity;
366
367 size_t _count;
368
369 bool rehash(size_t count);
370
371 item_t* get_item(const void* key)
372 {
373 assert(key);
374 assert(_capacity > 0);
375
376 size_t hashmod = _capacity - 1;
377 size_t bucket = hash(key) & hashmod;
378
379 for (size_t probe = 0; probe <= hashmod; ++probe)
380 {
381 item_t& probe_item = _items[bucket];
382
383 if (probe_item.key == key || probe_item.key == 0)
384 return &probe_item;
385
386 // hash collision, quadratic probing
387 bucket = (bucket + probe + 1) & hashmod;
388 }
389
390 assert(false && "Hash table is full"); // unreachable
391 return 0;
392 }
393
394 static PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash(const void* key)
395 {
396 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
397
398 // MurmurHash3 32-bit finalizer
399 h ^= h >> 16;
400 h *= 0x85ebca6bu;
401 h ^= h >> 13;
402 h *= 0xc2b2ae35u;
403 h ^= h >> 16;
404
405 return h;
406 }
407 };
408
409 PUGI_IMPL_FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
410 {
411 size_t capacity = 32;
412 while (count >= capacity - capacity / 4)
413 capacity *= 2;
414
415 compact_hash_table rt;
416 rt._capacity = capacity;
417 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
418
419 if (!rt._items)
420 return false;
421
422 memset(rt._items, 0, sizeof(item_t) * capacity);
423
424 for (size_t i = 0; i < _capacity; ++i)
425 if (_items[i].key)
426 rt.insert(_items[i].key, _items[i].value);
427
428 if (_items)
429 xml_memory::deallocate(_items);
430
431 _capacity = capacity;
432 _items = rt._items;
433
434 assert(_count == rt._count);
435
436 return true;
437 }
438
439 PUGI_IMPL_NS_END
440 #endif
441
442 PUGI_IMPL_NS_BEGIN
443 #ifdef PUGIXML_COMPACT
444 static const uintptr_t xml_memory_block_alignment = 4;
445 #else
446 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
447 #endif
448
449 // extra metadata bits
450 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
451 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
452 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
453 static const uintptr_t xml_memory_page_type_mask = 15;
454
455 // combined masks for string uniqueness
456 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
457 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
458
459 #ifdef PUGIXML_COMPACT
460 #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) // unused
461 #define PUGI_IMPL_GETPAGE_IMPL(header) (header).get_page()
462 #else
463 #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
464 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
465 #define PUGI_IMPL_GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
466 #endif
467
468 #define PUGI_IMPL_GETPAGE(n) PUGI_IMPL_GETPAGE_IMPL((n)->header)
469 #define PUGI_IMPL_NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
470
471 struct xml_allocator;
472
473 struct xml_memory_page
474 {
475 static xml_memory_page* construct(void* memory)
476 {
477 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
478
479 result->allocator = 0;
480 result->prev = 0;
481 result->next = 0;
482 result->busy_size = 0;
483 result->freed_size = 0;
484
485 #ifdef PUGIXML_COMPACT
486 result->compact_string_base = 0;
487 result->compact_shared_parent = 0;
488 result->compact_page_marker = 0;
489 #endif
490
491 return result;
492 }
493
494 xml_allocator* allocator;
495
496 xml_memory_page* prev;
497 xml_memory_page* next;
498
499 size_t busy_size;
500 size_t freed_size;
501
502 #ifdef PUGIXML_COMPACT
503 char_t* compact_string_base;
504 void* compact_shared_parent;
505 uint32_t* compact_page_marker;
506 #endif
507 };
508
509 static const size_t xml_memory_page_size =
510 #ifdef PUGIXML_MEMORY_PAGE_SIZE
511 (PUGIXML_MEMORY_PAGE_SIZE)
512 #else
513 32768
514 #endif
515 - sizeof(xml_memory_page);
516
517 struct xml_memory_string_header
518 {
519 uint16_t page_offset; // offset from page->data
520 uint16_t full_size; // 0 if string occupies whole page
521 };
522
523 struct xml_allocator
524 {
525 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
526 {
527 #ifdef PUGIXML_COMPACT
528 _hash = 0;
529 #endif
530 }
531
532 xml_memory_page* allocate_page(size_t data_size)
533 {
534 size_t size = sizeof(xml_memory_page) + data_size;
535
536 // allocate block with some alignment, leaving memory for worst-case padding
537 void* memory = xml_memory::allocate(size);
538 if (!memory) return 0;
539
540 // prepare page structure
541 xml_memory_page* page = xml_memory_page::construct(memory);
542 assert(page);
543
544 assert(this == _root->allocator);
545 page->allocator = this;
546
547 return page;
548 }
549
550 static void deallocate_page(xml_memory_page* page)
551 {
552 xml_memory::deallocate(page);
553 }
554
555 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
556
557 void* allocate_memory(size_t size, xml_memory_page*& out_page)
558 {
559 if (PUGI_IMPL_UNLIKELY(_busy_size + size > xml_memory_page_size))
560 return allocate_memory_oob(size, out_page);
561
562 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
563
564 _busy_size += size;
565
566 out_page = _root;
567
568 return buf;
569 }
570
571 #ifdef PUGIXML_COMPACT
572 void* allocate_object(size_t size, xml_memory_page*& out_page)
573 {
574 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
575 if (!result) return 0;
576
577 // adjust for marker
578 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
579
580 if (PUGI_IMPL_UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
581 {
582 // insert new marker
583 uint32_t* marker = static_cast<uint32_t*>(result);
584
585 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
586 out_page->compact_page_marker = marker;
587
588 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
589 // this will make sure deallocate_memory correctly tracks the size
590 out_page->freed_size += sizeof(uint32_t);
591
592 return marker + 1;
593 }
594 else
595 {
596 // roll back uint32_t part
597 _busy_size -= sizeof(uint32_t);
598
599 return result;
600 }
601 }
602 #else
603 void* allocate_object(size_t size, xml_memory_page*& out_page)
604 {
605 return allocate_memory(size, out_page);
606 }
607 #endif
608
609 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
610 {
611 if (page == _root) page->busy_size = _busy_size;
612
613 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
614 (void)!ptr;
615
616 page->freed_size += size;
617 assert(page->freed_size <= page->busy_size);
618
619 if (page->freed_size == page->busy_size)
620 {
621 if (page->next == 0)
622 {
623 assert(_root == page);
624
625 // top page freed, just reset sizes
626 page->busy_size = 0;
627 page->freed_size = 0;
628
629 #ifdef PUGIXML_COMPACT
630 // reset compact state to maximize efficiency
631 page->compact_string_base = 0;
632 page->compact_shared_parent = 0;
633 page->compact_page_marker = 0;
634 #endif
635
636 _busy_size = 0;
637 }
638 else
639 {
640 assert(_root != page);
641 assert(page->prev);
642
643 // remove from the list
644 page->prev->next = page->next;
645 page->next->prev = page->prev;
646
647 // deallocate
648 deallocate_page(page);
649 }
650 }
651 }
652
653 char_t* allocate_string(size_t length)
654 {
655 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
656
657 PUGI_IMPL_STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
658
659 // allocate memory for string and header block
660 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
661
662 // round size up to block alignment boundary
663 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
664
665 xml_memory_page* page;
666 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
667
668 if (!header) return 0;
669
670 // setup header
671 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
672
673 assert(page_offset % xml_memory_block_alignment == 0);
674 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
675 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
676
677 // full_size == 0 for large strings that occupy the whole page
678 assert(full_size % xml_memory_block_alignment == 0);
679 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
680 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
681
682 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
683 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
684 return static_cast<char_t*>(static_cast<void*>(header + 1));
685 }
686
687 void deallocate_string(char_t* string)
688 {
689 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
690 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
691
692 // get header
693 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
694 assert(header);
695
696 // deallocate
697 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
698 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
699
700 // if full_size == 0 then this string occupies the whole page
701 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
702
703 deallocate_memory(header, full_size, page);
704 }
705
706 bool reserve()
707 {
708 #ifdef PUGIXML_COMPACT
709 return _hash->reserve();
710 #else
711 return true;
712 #endif
713 }
714
715 xml_memory_page* _root;
716 size_t _busy_size;
717
718 #ifdef PUGIXML_COMPACT
719 compact_hash_table* _hash;
720 #endif
721 };
722
723 PUGI_IMPL_FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
724 {
725 const size_t large_allocation_threshold = xml_memory_page_size / 4;
726
727 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
728 out_page = page;
729
730 if (!page) return 0;
731
732 if (size <= large_allocation_threshold)
733 {
734 _root->busy_size = _busy_size;
735
736 // insert page at the end of linked list
737 page->prev = _root;
738 _root->next = page;
739 _root = page;
740
741 _busy_size = size;
742 }
743 else
744 {
745 // insert page before the end of linked list, so that it is deleted as soon as possible
746 // the last page is not deleted even if it's empty (see deallocate_memory)
747 assert(_root->prev);
748
749 page->prev = _root->prev;
750 page->next = _root;
751
752 _root->prev->next = page;
753 _root->prev = page;
754
755 page->busy_size = size;
756 }
757
758 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
759 }
760 PUGI_IMPL_NS_END
761
762 #ifdef PUGIXML_COMPACT
763 PUGI_IMPL_NS_BEGIN
764 static const uintptr_t compact_alignment_log2 = 2;
765 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
766
767 class compact_header
768 {
769 public:
770 compact_header(xml_memory_page* page, unsigned int flags)
771 {
772 PUGI_IMPL_STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
773
774 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
775 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
776
777 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
778 _flags = static_cast<unsigned char>(flags);
779 }
780
781 void operator&=(uintptr_t mod)
782 {
783 _flags &= static_cast<unsigned char>(mod);
784 }
785
786 void operator|=(uintptr_t mod)
787 {
788 _flags |= static_cast<unsigned char>(mod);
789 }
790
791 uintptr_t operator&(uintptr_t mod) const
792 {
793 return _flags & mod;
794 }
795
796 xml_memory_page* get_page() const
797 {
798 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
799 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
800 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
801
802 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
803 }
804
805 private:
806 unsigned char _page;
807 unsigned char _flags;
808 };
809
810 PUGI_IMPL_FN xml_memory_page* compact_get_page(const void* object, int header_offset)
811 {
812 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
813
814 return header->get_page();
815 }
816
817 template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE T* compact_get_value(const void* object)
818 {
819 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
820 }
821
822 template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE void compact_set_value(const void* object, T* value)
823 {
824 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
825 }
826
827 template <typename T, int header_offset, int start = -126> class compact_pointer
828 {
829 public:
830 compact_pointer(): _data(0)
831 {
832 }
833
834 void operator=(const compact_pointer& rhs)
835 {
836 *this = rhs + 0;
837 }
838
839 void operator=(T* value)
840 {
841 if (value)
842 {
843 // value is guaranteed to be compact-aligned; 'this' is not
844 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
845 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
846 // compensate for arithmetic shift rounding for negative values
847 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
848 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
849
850 if (static_cast<uintptr_t>(offset) <= 253)
851 _data = static_cast<unsigned char>(offset + 1);
852 else
853 {
854 compact_set_value<header_offset>(this, value);
855
856 _data = 255;
857 }
858 }
859 else
860 _data = 0;
861 }
862
863 operator T*() const
864 {
865 if (_data)
866 {
867 if (_data < 255)
868 {
869 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
870
871 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
872 }
873 else
874 return compact_get_value<header_offset, T>(this);
875 }
876 else
877 return 0;
878 }
879
880 T* operator->() const
881 {
882 return *this;
883 }
884
885 private:
886 unsigned char _data;
887 };
888
889 template <typename T, int header_offset> class compact_pointer_parent
890 {
891 public:
892 compact_pointer_parent(): _data(0)
893 {
894 }
895
896 void operator=(const compact_pointer_parent& rhs)
897 {
898 *this = rhs + 0;
899 }
900
901 void operator=(T* value)
902 {
903 if (value)
904 {
905 // value is guaranteed to be compact-aligned; 'this' is not
906 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
907 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
908 // compensate for arithmetic shift behavior for negative values
909 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
910 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
911
912 if (static_cast<uintptr_t>(offset) <= 65533)
913 {
914 _data = static_cast<unsigned short>(offset + 1);
915 }
916 else
917 {
918 xml_memory_page* page = compact_get_page(this, header_offset);
919
920 if (PUGI_IMPL_UNLIKELY(page->compact_shared_parent == 0))
921 page->compact_shared_parent = value;
922
923 if (page->compact_shared_parent == value)
924 {
925 _data = 65534;
926 }
927 else
928 {
929 compact_set_value<header_offset>(this, value);
930
931 _data = 65535;
932 }
933 }
934 }
935 else
936 {
937 _data = 0;
938 }
939 }
940
941 operator T*() const
942 {
943 if (_data)
944 {
945 if (_data < 65534)
946 {
947 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
948
949 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
950 }
951 else if (_data == 65534)
952 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
953 else
954 return compact_get_value<header_offset, T>(this);
955 }
956 else
957 return 0;
958 }
959
960 T* operator->() const
961 {
962 return *this;
963 }
964
965 private:
966 uint16_t _data;
967 };
968
969 template <int header_offset, int base_offset> class compact_string
970 {
971 public:
972 compact_string(): _data(0)
973 {
974 }
975
976 void operator=(const compact_string& rhs)
977 {
978 *this = rhs + 0;
979 }
980
981 void operator=(char_t* value)
982 {
983 if (value)
984 {
985 xml_memory_page* page = compact_get_page(this, header_offset);
986
987 if (PUGI_IMPL_UNLIKELY(page->compact_string_base == 0))
988 page->compact_string_base = value;
989
990 ptrdiff_t offset = value - page->compact_string_base;
991
992 if (static_cast<uintptr_t>(offset) < (65535 << 7))
993 {
994 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
995 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
996
997 if (*base == 0)
998 {
999 *base = static_cast<uint16_t>((offset >> 7) + 1);
1000 _data = static_cast<unsigned char>((offset & 127) + 1);
1001 }
1002 else
1003 {
1004 ptrdiff_t remainder = offset - ((*base - 1) << 7);
1005
1006 if (static_cast<uintptr_t>(remainder) <= 253)
1007 {
1008 _data = static_cast<unsigned char>(remainder + 1);
1009 }
1010 else
1011 {
1012 compact_set_value<header_offset>(this, value);
1013
1014 _data = 255;
1015 }
1016 }
1017 }
1018 else
1019 {
1020 compact_set_value<header_offset>(this, value);
1021
1022 _data = 255;
1023 }
1024 }
1025 else
1026 {
1027 _data = 0;
1028 }
1029 }
1030
1031 operator char_t*() const
1032 {
1033 if (_data)
1034 {
1035 if (_data < 255)
1036 {
1037 xml_memory_page* page = compact_get_page(this, header_offset);
1038
1039 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1040 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1041 assert(*base);
1042
1043 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1044
1045 return page->compact_string_base + offset;
1046 }
1047 else
1048 {
1049 return compact_get_value<header_offset, char_t>(this);
1050 }
1051 }
1052 else
1053 return 0;
1054 }
1055
1056 private:
1057 unsigned char _data;
1058 };
1059 PUGI_IMPL_NS_END
1060 #endif
1061
1062 #ifdef PUGIXML_COMPACT
1063 namespace pugi
1064 {
1065 struct xml_attribute_struct
1066 {
1067 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1068 {
1069 PUGI_IMPL_STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1070 }
1071
1072 impl::compact_header header;
1073
1074 uint16_t namevalue_base;
1075
1076 impl::compact_string<4, 2> name;
1077 impl::compact_string<5, 3> value;
1078
1079 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1080 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1081 };
1082
1083 struct xml_node_struct
1084 {
1085 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1086 {
1087 PUGI_IMPL_STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1088 }
1089
1090 impl::compact_header header;
1091
1092 uint16_t namevalue_base;
1093
1094 impl::compact_string<4, 2> name;
1095 impl::compact_string<5, 3> value;
1096
1097 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1098
1099 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1100
1101 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1102 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1103
1104 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1105 };
1106 }
1107 #else
1108 namespace pugi
1109 {
1110 struct xml_attribute_struct
1111 {
1112 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1113 {
1114 header = PUGI_IMPL_GETHEADER_IMPL(this, page, 0);
1115 }
1116
1117 uintptr_t header;
1118
1119 char_t* name;
1120 char_t* value;
1121
1122 xml_attribute_struct* prev_attribute_c;
1123 xml_attribute_struct* next_attribute;
1124 };
1125
1126 struct xml_node_struct
1127 {
1128 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1129 {
1130 header = PUGI_IMPL_GETHEADER_IMPL(this, page, type);
1131 }
1132
1133 uintptr_t header;
1134
1135 char_t* name;
1136 char_t* value;
1137
1138 xml_node_struct* parent;
1139
1140 xml_node_struct* first_child;
1141
1142 xml_node_struct* prev_sibling_c;
1143 xml_node_struct* next_sibling;
1144
1145 xml_attribute_struct* first_attribute;
1146 };
1147 }
1148 #endif
1149
1150 PUGI_IMPL_NS_BEGIN
1151 struct xml_extra_buffer
1152 {
1153 char_t* buffer;
1154 xml_extra_buffer* next;
1155 };
1156
1157 struct xml_document_struct: public xml_node_struct, public xml_allocator
1158 {
1159 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1160 {
1161 }
1162
1163 const char_t* buffer;
1164
1165 xml_extra_buffer* extra_buffers;
1166
1167 #ifdef PUGIXML_COMPACT
1168 compact_hash_table hash;
1169 #endif
1170 };
1171
1172 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1173 {
1174 assert(object);
1175
1176 return *PUGI_IMPL_GETPAGE(object)->allocator;
1177 }
1178
1179 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1180 {
1181 assert(object);
1182
1183 return *static_cast<xml_document_struct*>(PUGI_IMPL_GETPAGE(object)->allocator);
1184 }
1185 PUGI_IMPL_NS_END
1186
1187 // Low-level DOM operations
1188 PUGI_IMPL_NS_BEGIN
1189 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1190 {
1191 xml_memory_page* page;
1192 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1193 if (!memory) return 0;
1194
1195 return new (memory) xml_attribute_struct(page);
1196 }
1197
1198 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1199 {
1200 xml_memory_page* page;
1201 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1202 if (!memory) return 0;
1203
1204 return new (memory) xml_node_struct(page, type);
1205 }
1206
1207 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1208 {
1209 if (a->header & impl::xml_memory_page_name_allocated_mask)
1210 alloc.deallocate_string(a->name);
1211
1212 if (a->header & impl::xml_memory_page_value_allocated_mask)
1213 alloc.deallocate_string(a->value);
1214
1215 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI_IMPL_GETPAGE(a));
1216 }
1217
1218 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1219 {
1220 if (n->header & impl::xml_memory_page_name_allocated_mask)
1221 alloc.deallocate_string(n->name);
1222
1223 if (n->header & impl::xml_memory_page_value_allocated_mask)
1224 alloc.deallocate_string(n->value);
1225
1226 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1227 {
1228 xml_attribute_struct* next = attr->next_attribute;
1229
1230 destroy_attribute(attr, alloc);
1231
1232 attr = next;
1233 }
1234
1235 for (xml_node_struct* child = n->first_child; child; )
1236 {
1237 xml_node_struct* next = child->next_sibling;
1238
1239 destroy_node(child, alloc);
1240
1241 child = next;
1242 }
1243
1244 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI_IMPL_GETPAGE(n));
1245 }
1246
1247 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1248 {
1249 child->parent = node;
1250
1251 xml_node_struct* head = node->first_child;
1252
1253 if (head)
1254 {
1255 xml_node_struct* tail = head->prev_sibling_c;
1256
1257 tail->next_sibling = child;
1258 child->prev_sibling_c = tail;
1259 head->prev_sibling_c = child;
1260 }
1261 else
1262 {
1263 node->first_child = child;
1264 child->prev_sibling_c = child;
1265 }
1266 }
1267
1268 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1269 {
1270 child->parent = node;
1271
1272 xml_node_struct* head = node->first_child;
1273
1274 if (head)
1275 {
1276 child->prev_sibling_c = head->prev_sibling_c;
1277 head->prev_sibling_c = child;
1278 }
1279 else
1280 child->prev_sibling_c = child;
1281
1282 child->next_sibling = head;
1283 node->first_child = child;
1284 }
1285
1286 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1287 {
1288 xml_node_struct* parent = node->parent;
1289
1290 child->parent = parent;
1291
1292 xml_node_struct* next = node->next_sibling;
1293
1294 if (next)
1295 next->prev_sibling_c = child;
1296 else
1297 parent->first_child->prev_sibling_c = child;
1298
1299 child->next_sibling = next;
1300 child->prev_sibling_c = node;
1301
1302 node->next_sibling = child;
1303 }
1304
1305 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1306 {
1307 xml_node_struct* parent = node->parent;
1308
1309 child->parent = parent;
1310
1311 xml_node_struct* prev = node->prev_sibling_c;
1312
1313 if (prev->next_sibling)
1314 prev->next_sibling = child;
1315 else
1316 parent->first_child = child;
1317
1318 child->prev_sibling_c = prev;
1319 child->next_sibling = node;
1320
1321 node->prev_sibling_c = child;
1322 }
1323
1324 inline void remove_node(xml_node_struct* node)
1325 {
1326 xml_node_struct* parent = node->parent;
1327
1328 xml_node_struct* next = node->next_sibling;
1329 xml_node_struct* prev = node->prev_sibling_c;
1330
1331 if (next)
1332 next->prev_sibling_c = prev;
1333 else
1334 parent->first_child->prev_sibling_c = prev;
1335
1336 if (prev->next_sibling)
1337 prev->next_sibling = next;
1338 else
1339 parent->first_child = next;
1340
1341 node->parent = 0;
1342 node->prev_sibling_c = 0;
1343 node->next_sibling = 0;
1344 }
1345
1346 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1347 {
1348 xml_attribute_struct* head = node->first_attribute;
1349
1350 if (head)
1351 {
1352 xml_attribute_struct* tail = head->prev_attribute_c;
1353
1354 tail->next_attribute = attr;
1355 attr->prev_attribute_c = tail;
1356 head->prev_attribute_c = attr;
1357 }
1358 else
1359 {
1360 node->first_attribute = attr;
1361 attr->prev_attribute_c = attr;
1362 }
1363 }
1364
1365 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1366 {
1367 xml_attribute_struct* head = node->first_attribute;
1368
1369 if (head)
1370 {
1371 attr->prev_attribute_c = head->prev_attribute_c;
1372 head->prev_attribute_c = attr;
1373 }
1374 else
1375 attr->prev_attribute_c = attr;
1376
1377 attr->next_attribute = head;
1378 node->first_attribute = attr;
1379 }
1380
1381 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1382 {
1383 xml_attribute_struct* next = place->next_attribute;
1384
1385 if (next)
1386 next->prev_attribute_c = attr;
1387 else
1388 node->first_attribute->prev_attribute_c = attr;
1389
1390 attr->next_attribute = next;
1391 attr->prev_attribute_c = place;
1392 place->next_attribute = attr;
1393 }
1394
1395 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1396 {
1397 xml_attribute_struct* prev = place->prev_attribute_c;
1398
1399 if (prev->next_attribute)
1400 prev->next_attribute = attr;
1401 else
1402 node->first_attribute = attr;
1403
1404 attr->prev_attribute_c = prev;
1405 attr->next_attribute = place;
1406 place->prev_attribute_c = attr;
1407 }
1408
1409 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1410 {
1411 xml_attribute_struct* next = attr->next_attribute;
1412 xml_attribute_struct* prev = attr->prev_attribute_c;
1413
1414 if (next)
1415 next->prev_attribute_c = prev;
1416 else
1417 node->first_attribute->prev_attribute_c = prev;
1418
1419 if (prev->next_attribute)
1420 prev->next_attribute = next;
1421 else
1422 node->first_attribute = next;
1423
1424 attr->prev_attribute_c = 0;
1425 attr->next_attribute = 0;
1426 }
1427
1428 PUGI_IMPL_FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1429 {
1430 if (!alloc.reserve()) return 0;
1431
1432 xml_node_struct* child = allocate_node(alloc, type);
1433 if (!child) return 0;
1434
1435 append_node(child, node);
1436
1437 return child;
1438 }
1439
1440 PUGI_IMPL_FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1441 {
1442 if (!alloc.reserve()) return 0;
1443
1444 xml_attribute_struct* attr = allocate_attribute(alloc);
1445 if (!attr) return 0;
1446
1447 append_attribute(attr, node);
1448
1449 return attr;
1450 }
1451 PUGI_IMPL_NS_END
1452
1453 // Helper classes for code generation
1454 PUGI_IMPL_NS_BEGIN
1455 struct opt_false
1456 {
1457 enum { value = 0 };
1458 };
1459
1460 struct opt_true
1461 {
1462 enum { value = 1 };
1463 };
1464 PUGI_IMPL_NS_END
1465
1466 // Unicode utilities
1467 PUGI_IMPL_NS_BEGIN
1468 inline uint16_t endian_swap(uint16_t value)
1469 {
1470 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1471 }
1472
1473 inline uint32_t endian_swap(uint32_t value)
1474 {
1475 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1476 }
1477
1478 struct utf8_counter
1479 {
1480 typedef size_t value_type;
1481
1482 static value_type low(value_type result, uint32_t ch)
1483 {
1484 // U+0000..U+007F
1485 if (ch < 0x80) return result + 1;
1486 // U+0080..U+07FF
1487 else if (ch < 0x800) return result + 2;
1488 // U+0800..U+FFFF
1489 else return result + 3;
1490 }
1491
1492 static value_type high(value_type result, uint32_t)
1493 {
1494 // U+10000..U+10FFFF
1495 return result + 4;
1496 }
1497 };
1498
1499 struct utf8_writer
1500 {
1501 typedef uint8_t* value_type;
1502
1503 static value_type low(value_type result, uint32_t ch)
1504 {
1505 // U+0000..U+007F
1506 if (ch < 0x80)
1507 {
1508 *result = static_cast<uint8_t>(ch);
1509 return result + 1;
1510 }
1511 // U+0080..U+07FF
1512 else if (ch < 0x800)
1513 {
1514 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1515 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1516 return result + 2;
1517 }
1518 // U+0800..U+FFFF
1519 else
1520 {
1521 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1522 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1523 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1524 return result + 3;
1525 }
1526 }
1527
1528 static value_type high(value_type result, uint32_t ch)
1529 {
1530 // U+10000..U+10FFFF
1531 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1532 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1533 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1534 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1535 return result + 4;
1536 }
1537
1538 static value_type any(value_type result, uint32_t ch)
1539 {
1540 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1541 }
1542 };
1543
1544 struct utf16_counter
1545 {
1546 typedef size_t value_type;
1547
1548 static value_type low(value_type result, uint32_t)
1549 {
1550 return result + 1;
1551 }
1552
1553 static value_type high(value_type result, uint32_t)
1554 {
1555 return result + 2;
1556 }
1557 };
1558
1559 struct utf16_writer
1560 {
1561 typedef uint16_t* value_type;
1562
1563 static value_type low(value_type result, uint32_t ch)
1564 {
1565 *result = static_cast<uint16_t>(ch);
1566
1567 return result + 1;
1568 }
1569
1570 static value_type high(value_type result, uint32_t ch)
1571 {
1572 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1573 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1574
1575 result[0] = static_cast<uint16_t>(0xD800 + msh);
1576 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1577
1578 return result + 2;
1579 }
1580
1581 static value_type any(value_type result, uint32_t ch)
1582 {
1583 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1584 }
1585 };
1586
1587 struct utf32_counter
1588 {
1589 typedef size_t value_type;
1590
1591 static value_type low(value_type result, uint32_t)
1592 {
1593 return result + 1;
1594 }
1595
1596 static value_type high(value_type result, uint32_t)
1597 {
1598 return result + 1;
1599 }
1600 };
1601
1602 struct utf32_writer
1603 {
1604 typedef uint32_t* value_type;
1605
1606 static value_type low(value_type result, uint32_t ch)
1607 {
1608 *result = ch;
1609
1610 return result + 1;
1611 }
1612
1613 static value_type high(value_type result, uint32_t ch)
1614 {
1615 *result = ch;
1616
1617 return result + 1;
1618 }
1619
1620 static value_type any(value_type result, uint32_t ch)
1621 {
1622 *result = ch;
1623
1624 return result + 1;
1625 }
1626 };
1627
1628 struct latin1_writer
1629 {
1630 typedef uint8_t* value_type;
1631
1632 static value_type low(value_type result, uint32_t ch)
1633 {
1634 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1635
1636 return result + 1;
1637 }
1638
1639 static value_type high(value_type result, uint32_t ch)
1640 {
1641 (void)ch;
1642
1643 *result = '?';
1644
1645 return result + 1;
1646 }
1647 };
1648
1649 struct utf8_decoder
1650 {
1651 typedef uint8_t type;
1652
1653 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1654 {
1655 const uint8_t utf8_byte_mask = 0x3f;
1656
1657 while (size)
1658 {
1659 uint8_t lead = *data;
1660
1661 // 0xxxxxxx -> U+0000..U+007F
1662 if (lead < 0x80)
1663 {
1664 result = Traits::low(result, lead);
1665 data += 1;
1666 size -= 1;
1667
1668 // process aligned single-byte (ascii) blocks
1669 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1670 {
1671 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1672 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1673 {
1674 result = Traits::low(result, data[0]);
1675 result = Traits::low(result, data[1]);
1676 result = Traits::low(result, data[2]);
1677 result = Traits::low(result, data[3]);
1678 data += 4;
1679 size -= 4;
1680 }
1681 }
1682 }
1683 // 110xxxxx -> U+0080..U+07FF
1684 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1685 {
1686 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1687 data += 2;
1688 size -= 2;
1689 }
1690 // 1110xxxx -> U+0800-U+FFFF
1691 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1692 {
1693 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1694 data += 3;
1695 size -= 3;
1696 }
1697 // 11110xxx -> U+10000..U+10FFFF
1698 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1699 {
1700 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1701 data += 4;
1702 size -= 4;
1703 }
1704 // 10xxxxxx or 11111xxx -> invalid
1705 else
1706 {
1707 data += 1;
1708 size -= 1;
1709 }
1710 }
1711
1712 return result;
1713 }
1714 };
1715
1716 template <typename opt_swap> struct utf16_decoder
1717 {
1718 typedef uint16_t type;
1719
1720 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1721 {
1722 while (size)
1723 {
1724 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1725
1726 // U+0000..U+D7FF
1727 if (lead < 0xD800)
1728 {
1729 result = Traits::low(result, lead);
1730 data += 1;
1731 size -= 1;
1732 }
1733 // U+E000..U+FFFF
1734 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1735 {
1736 result = Traits::low(result, lead);
1737 data += 1;
1738 size -= 1;
1739 }
1740 // surrogate pair lead
1741 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1742 {
1743 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1744
1745 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1746 {
1747 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1748 data += 2;
1749 size -= 2;
1750 }
1751 else
1752 {
1753 data += 1;
1754 size -= 1;
1755 }
1756 }
1757 else
1758 {
1759 data += 1;
1760 size -= 1;
1761 }
1762 }
1763
1764 return result;
1765 }
1766 };
1767
1768 template <typename opt_swap> struct utf32_decoder
1769 {
1770 typedef uint32_t type;
1771
1772 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1773 {
1774 while (size)
1775 {
1776 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1777
1778 // U+0000..U+FFFF
1779 if (lead < 0x10000)
1780 {
1781 result = Traits::low(result, lead);
1782 data += 1;
1783 size -= 1;
1784 }
1785 // U+10000..U+10FFFF
1786 else
1787 {
1788 result = Traits::high(result, lead);
1789 data += 1;
1790 size -= 1;
1791 }
1792 }
1793
1794 return result;
1795 }
1796 };
1797
1798 struct latin1_decoder
1799 {
1800 typedef uint8_t type;
1801
1802 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1803 {
1804 while (size)
1805 {
1806 result = Traits::low(result, *data);
1807 data += 1;
1808 size -= 1;
1809 }
1810
1811 return result;
1812 }
1813 };
1814
1815 template <size_t size> struct wchar_selector;
1816
1817 template <> struct wchar_selector<2>
1818 {
1819 typedef uint16_t type;
1820 typedef utf16_counter counter;
1821 typedef utf16_writer writer;
1822 typedef utf16_decoder<opt_false> decoder;
1823 };
1824
1825 template <> struct wchar_selector<4>
1826 {
1827 typedef uint32_t type;
1828 typedef utf32_counter counter;
1829 typedef utf32_writer writer;
1830 typedef utf32_decoder<opt_false> decoder;
1831 };
1832
1833 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1834 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1835
1836 struct wchar_decoder
1837 {
1838 typedef wchar_t type;
1839
1840 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1841 {
1842 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1843
1844 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1845 }
1846 };
1847
1848 #ifdef PUGIXML_WCHAR_MODE
1849 PUGI_IMPL_FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1850 {
1851 for (size_t i = 0; i < length; ++i)
1852 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1853 }
1854 #endif
1855 PUGI_IMPL_NS_END
1856
1857 PUGI_IMPL_NS_BEGIN
1858 enum chartype_t
1859 {
1860 ct_parse_pcdata = 1, // \0, &, \r, <
1861 ct_parse_attr = 2, // \0, &, \r, ', "
1862 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1863 ct_space = 8, // \r, \n, space, tab
1864 ct_parse_cdata = 16, // \0, ], >, \r
1865 ct_parse_comment = 32, // \0, -, >, \r
1866 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1867 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1868 };
1869
1870 static const unsigned char chartype_table[256] =
1871 {
1872 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1873 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1874 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1875 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1876 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1877 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1878 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1879 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1880
1881 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1882 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1883 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1884 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1885 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1886 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1887 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1888 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1889 };
1890
1891 enum chartypex_t
1892 {
1893 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1894 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
1895 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1896 ctx_digit = 8, // 0-9
1897 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1898 };
1899
1900 static const unsigned char chartypex_table[256] =
1901 {
1902 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
1903 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1904 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1905 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
1906
1907 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1908 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1909 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1910 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1911
1912 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1913 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1914 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1915 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1916 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1917 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1918 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1919 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1920 };
1921
1922 #ifdef PUGIXML_WCHAR_MODE
1923 #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1924 #else
1925 #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1926 #endif
1927
1928 #define PUGI_IMPL_IS_CHARTYPE(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartype_table)
1929 #define PUGI_IMPL_IS_CHARTYPEX(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1930
1931 PUGI_IMPL_FN bool is_little_endian()
1932 {
1933 unsigned int ui = 1;
1934
1935 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1936 }
1937
1938 PUGI_IMPL_FN xml_encoding get_wchar_encoding()
1939 {
1940 PUGI_IMPL_STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1941
1942 if (sizeof(wchar_t) == 2)
1943 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1944 else
1945 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1946 }
1947
1948 PUGI_IMPL_FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1949 {
1950 #define PUGI_IMPL_SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1951 #define PUGI_IMPL_SCANCHARTYPE(ct) { while (offset < size && PUGI_IMPL_IS_CHARTYPE(data[offset], ct)) offset++; }
1952
1953 // check if we have a non-empty XML declaration
1954 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI_IMPL_IS_CHARTYPE(data[5], ct_space)))
1955 return false;
1956
1957 // scan XML declaration until the encoding field
1958 for (size_t i = 6; i + 1 < size; ++i)
1959 {
1960 // declaration can not contain ? in quoted values
1961 if (data[i] == '?')
1962 return false;
1963
1964 if (data[i] == 'e' && data[i + 1] == 'n')
1965 {
1966 size_t offset = i;
1967
1968 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1969 PUGI_IMPL_SCANCHAR('e'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('c'); PUGI_IMPL_SCANCHAR('o');
1970 PUGI_IMPL_SCANCHAR('d'); PUGI_IMPL_SCANCHAR('i'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('g');
1971
1972 // S? = S?
1973 PUGI_IMPL_SCANCHARTYPE(ct_space);
1974 PUGI_IMPL_SCANCHAR('=');
1975 PUGI_IMPL_SCANCHARTYPE(ct_space);
1976
1977 // the only two valid delimiters are ' and "
1978 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1979
1980 PUGI_IMPL_SCANCHAR(delimiter);
1981
1982 size_t start = offset;
1983
1984 out_encoding = data + offset;
1985
1986 PUGI_IMPL_SCANCHARTYPE(ct_symbol);
1987
1988 out_length = offset - start;
1989
1990 PUGI_IMPL_SCANCHAR(delimiter);
1991
1992 return true;
1993 }
1994 }
1995
1996 return false;
1997
1998 #undef PUGI_IMPL_SCANCHAR
1999 #undef PUGI_IMPL_SCANCHARTYPE
2000 }
2001
2002 PUGI_IMPL_FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
2003 {
2004 // skip encoding autodetection if input buffer is too small
2005 if (size < 4) return encoding_utf8;
2006
2007 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
2008
2009 // look for BOM in first few bytes
2010 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
2011 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
2012 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
2013 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
2014 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
2015
2016 // look for <, <? or <?xm in various encodings
2017 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
2018 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
2019 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
2020 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
2021
2022 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
2023 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
2024 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
2025
2026 // no known BOM detected; parse declaration
2027 const uint8_t* enc = 0;
2028 size_t enc_length = 0;
2029
2030 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2031 {
2032 // iso-8859-1 (case-insensitive)
2033 if (enc_length == 10
2034 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2035 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2036 && enc[8] == '-' && enc[9] == '1')
2037 return encoding_latin1;
2038
2039 // latin1 (case-insensitive)
2040 if (enc_length == 6
2041 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2042 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2043 && enc[5] == '1')
2044 return encoding_latin1;
2045 }
2046
2047 return encoding_utf8;
2048 }
2049
2050 PUGI_IMPL_FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2051 {
2052 // replace wchar encoding with utf implementation
2053 if (encoding == encoding_wchar) return get_wchar_encoding();
2054
2055 // replace utf16 encoding with utf16 with specific endianness
2056 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2057
2058 // replace utf32 encoding with utf32 with specific endianness
2059 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2060
2061 // only do autodetection if no explicit encoding is requested
2062 if (encoding != encoding_auto) return encoding;
2063
2064 // try to guess encoding (based on XML specification, Appendix F.1)
2065 const uint8_t* data = static_cast<const uint8_t*>(contents);
2066
2067 return guess_buffer_encoding(data, size);
2068 }
2069
2070 PUGI_IMPL_FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2071 {
2072 size_t length = size / sizeof(char_t);
2073
2074 if (is_mutable)
2075 {
2076 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2077 out_length = length;
2078 }
2079 else
2080 {
2081 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2082 if (!buffer) return false;
2083
2084 if (contents)
2085 memcpy(buffer, contents, length * sizeof(char_t));
2086 else
2087 assert(length == 0);
2088
2089 buffer[length] = 0;
2090
2091 out_buffer = buffer;
2092 out_length = length + 1;
2093 }
2094
2095 return true;
2096 }
2097
2098 #ifdef PUGIXML_WCHAR_MODE
2099 PUGI_IMPL_FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2100 {
2101 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2102 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2103 }
2104
2105 PUGI_IMPL_FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2106 {
2107 const char_t* data = static_cast<const char_t*>(contents);
2108 size_t length = size / sizeof(char_t);
2109
2110 if (is_mutable)
2111 {
2112 char_t* buffer = const_cast<char_t*>(data);
2113
2114 convert_wchar_endian_swap(buffer, data, length);
2115
2116 out_buffer = buffer;
2117 out_length = length;
2118 }
2119 else
2120 {
2121 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2122 if (!buffer) return false;
2123
2124 convert_wchar_endian_swap(buffer, data, length);
2125 buffer[length] = 0;
2126
2127 out_buffer = buffer;
2128 out_length = length + 1;
2129 }
2130
2131 return true;
2132 }
2133
2134 template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2135 {
2136 const typename D::type* data = static_cast<const typename D::type*>(contents);
2137 size_t data_length = size / sizeof(typename D::type);
2138
2139 // first pass: get length in wchar_t units
2140 size_t length = D::process(data, data_length, 0, wchar_counter());
2141
2142 // allocate buffer of suitable length
2143 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2144 if (!buffer) return false;
2145
2146 // second pass: convert utf16 input to wchar_t
2147 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2148 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2149
2150 assert(oend == obegin + length);
2151 *oend = 0;
2152
2153 out_buffer = buffer;
2154 out_length = length + 1;
2155
2156 return true;
2157 }
2158
2159 PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2160 {
2161 // get native encoding
2162 xml_encoding wchar_encoding = get_wchar_encoding();
2163
2164 // fast path: no conversion required
2165 if (encoding == wchar_encoding)
2166 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2167
2168 // only endian-swapping is required
2169 if (need_endian_swap_utf(encoding, wchar_encoding))
2170 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2171
2172 // source encoding is utf8
2173 if (encoding == encoding_utf8)
2174 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2175
2176 // source encoding is utf16
2177 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2178 {
2179 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2180
2181 return (native_encoding == encoding) ?
2182 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2183 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2184 }
2185
2186 // source encoding is utf32
2187 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2188 {
2189 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2190
2191 return (native_encoding == encoding) ?
2192 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2193 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2194 }
2195
2196 // source encoding is latin1
2197 if (encoding == encoding_latin1)
2198 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2199
2200 assert(false && "Invalid encoding"); // unreachable
2201 return false;
2202 }
2203 #else
2204 template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2205 {
2206 const typename D::type* data = static_cast<const typename D::type*>(contents);
2207 size_t data_length = size / sizeof(typename D::type);
2208
2209 // first pass: get length in utf8 units
2210 size_t length = D::process(data, data_length, 0, utf8_counter());
2211
2212 // allocate buffer of suitable length
2213 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2214 if (!buffer) return false;
2215
2216 // second pass: convert utf16 input to utf8
2217 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2218 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2219
2220 assert(oend == obegin + length);
2221 *oend = 0;
2222
2223 out_buffer = buffer;
2224 out_length = length + 1;
2225
2226 return true;
2227 }
2228
2229 PUGI_IMPL_FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2230 {
2231 for (size_t i = 0; i < size; ++i)
2232 if (data[i] > 127)
2233 return i;
2234
2235 return size;
2236 }
2237
2238 PUGI_IMPL_FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2239 {
2240 const uint8_t* data = static_cast<const uint8_t*>(contents);
2241 size_t data_length = size;
2242
2243 // get size of prefix that does not need utf8 conversion
2244 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2245 assert(prefix_length <= data_length);
2246
2247 const uint8_t* postfix = data + prefix_length;
2248 size_t postfix_length = data_length - prefix_length;
2249
2250 // if no conversion is needed, just return the original buffer
2251 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2252
2253 // first pass: get length in utf8 units
2254 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2255
2256 // allocate buffer of suitable length
2257 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2258 if (!buffer) return false;
2259
2260 // second pass: convert latin1 input to utf8
2261 memcpy(buffer, data, prefix_length);
2262
2263 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2264 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2265
2266 assert(oend == obegin + length);
2267 *oend = 0;
2268
2269 out_buffer = buffer;
2270 out_length = length + 1;
2271
2272 return true;
2273 }
2274
2275 PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2276 {
2277 // fast path: no conversion required
2278 if (encoding == encoding_utf8)
2279 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2280
2281 // source encoding is utf16
2282 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2283 {
2284 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2285
2286 return (native_encoding == encoding) ?
2287 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2288 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2289 }
2290
2291 // source encoding is utf32
2292 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2293 {
2294 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2295
2296 return (native_encoding == encoding) ?
2297 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2298 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2299 }
2300
2301 // source encoding is latin1
2302 if (encoding == encoding_latin1)
2303 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2304
2305 assert(false && "Invalid encoding"); // unreachable
2306 return false;
2307 }
2308 #endif
2309
2310 PUGI_IMPL_FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2311 {
2312 // get length in utf8 characters
2313 return wchar_decoder::process(str, length, 0, utf8_counter());
2314 }
2315
2316 PUGI_IMPL_FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2317 {
2318 // convert to utf8
2319 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2320 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2321
2322 assert(begin + size == end);
2323 (void)!end;
2324 (void)!size;
2325 }
2326
2327 #ifndef PUGIXML_NO_STL
2328 PUGI_IMPL_FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2329 {
2330 // first pass: get length in utf8 characters
2331 size_t size = as_utf8_begin(str, length);
2332
2333 // allocate resulting string
2334 std::string result;
2335 result.resize(size);
2336
2337 // second pass: convert to utf8
2338 if (size > 0) as_utf8_end(&result[0], size, str, length);
2339
2340 return result;
2341 }
2342
2343 PUGI_IMPL_FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2344 {
2345 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2346
2347 // first pass: get length in wchar_t units
2348 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2349
2350 // allocate resulting string
2351 std::basic_string<wchar_t> result;
2352 result.resize(length);
2353
2354 // second pass: convert to wchar_t
2355 if (length > 0)
2356 {
2357 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2358 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2359
2360 assert(begin + length == end);
2361 (void)!end;
2362 }
2363
2364 return result;
2365 }
2366 #endif
2367
2368 template <typename Header>
2369 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2370 {
2371 // never reuse shared memory
2372 if (header & xml_memory_page_contents_shared_mask) return false;
2373
2374 size_t target_length = strlength(target);
2375
2376 // always reuse document buffer memory if possible
2377 if ((header & header_mask) == 0) return target_length >= length;
2378
2379 // reuse heap memory if waste is not too great
2380 const size_t reuse_threshold = 32;
2381
2382 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2383 }
2384
2385 template <typename String, typename Header>
2386 PUGI_IMPL_FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2387 {
2388 assert((header & header_mask) == 0 || dest); // header bit indicates whether dest was previously allocated
2389
2390 if (source_length == 0)
2391 {
2392 // empty string and null pointer are equivalent, so just deallocate old memory
2393 xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator;
2394
2395 if (header & header_mask) alloc->deallocate_string(dest);
2396
2397 // mark the string as not allocated
2398 dest = 0;
2399 header &= ~header_mask;
2400
2401 return true;
2402 }
2403 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2404 {
2405 // we can reuse old buffer, so just copy the new data (including zero terminator)
2406 memcpy(dest, source, source_length * sizeof(char_t));
2407 dest[source_length] = 0;
2408
2409 return true;
2410 }
2411 else
2412 {
2413 xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator;
2414
2415 if (!alloc->reserve()) return false;
2416
2417 // allocate new buffer
2418 char_t* buf = alloc->allocate_string(source_length + 1);
2419 if (!buf) return false;
2420
2421 // copy the string (including zero terminator)
2422 memcpy(buf, source, source_length * sizeof(char_t));
2423 buf[source_length] = 0;
2424
2425 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2426 if (header & header_mask) alloc->deallocate_string(dest);
2427
2428 // the string is now allocated, so set the flag
2429 dest = buf;
2430 header |= header_mask;
2431
2432 return true;
2433 }
2434 }
2435
2436 struct gap
2437 {
2438 char_t* end;
2439 size_t size;
2440
2441 gap(): end(0), size(0)
2442 {
2443 }
2444
2445 // Push new gap, move s count bytes further (skipping the gap).
2446 // Collapse previous gap.
2447 void push(char_t*& s, size_t count)
2448 {
2449 if (end) // there was a gap already; collapse it
2450 {
2451 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2452 assert(s >= end);
2453 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2454 }
2455
2456 s += count; // end of current gap
2457
2458 // "merge" two gaps
2459 end = s;
2460 size += count;
2461 }
2462
2463 // Collapse all gaps, return past-the-end pointer
2464 char_t* flush(char_t* s)
2465 {
2466 if (end)
2467 {
2468 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2469 assert(s >= end);
2470 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2471
2472 return s - size;
2473 }
2474 else return s;
2475 }
2476 };
2477
2478 PUGI_IMPL_FN char_t* strconv_escape(char_t* s, gap& g)
2479 {
2480 char_t* stre = s + 1;
2481
2482 switch (*stre)
2483 {
2484 case '#': // &#...
2485 {
2486 unsigned int ucsc = 0;
2487
2488 if (stre[1] == 'x') // &#x... (hex code)
2489 {
2490 stre += 2;
2491
2492 char_t ch = *stre;
2493
2494 if (ch == ';') return stre;
2495
2496 for (;;)
2497 {
2498 if (static_cast<unsigned int>(ch - '0') <= 9)
2499 ucsc = 16 * ucsc + (ch - '0');
2500 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2501 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2502 else if (ch == ';')
2503 break;
2504 else // cancel
2505 return stre;
2506
2507 ch = *++stre;
2508 }
2509
2510 ++stre;
2511 }
2512 else // &#... (dec code)
2513 {
2514 char_t ch = *++stre;
2515
2516 if (ch == ';') return stre;
2517
2518 for (;;)
2519 {
2520 if (static_cast<unsigned int>(ch - '0') <= 9)
2521 ucsc = 10 * ucsc + (ch - '0');
2522 else if (ch == ';')
2523 break;
2524 else // cancel
2525 return stre;
2526
2527 ch = *++stre;
2528 }
2529
2530 ++stre;
2531 }
2532
2533 #ifdef PUGIXML_WCHAR_MODE
2534 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2535 #else
2536 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2537 #endif
2538
2539 g.push(s, stre - s);
2540 return stre;
2541 }
2542
2543 case 'a': // &a
2544 {
2545 ++stre;
2546
2547 if (*stre == 'm') // &am
2548 {
2549 if (*++stre == 'p' && *++stre == ';') // &amp;
2550 {
2551 *s++ = '&';
2552 ++stre;
2553
2554 g.push(s, stre - s);
2555 return stre;
2556 }
2557 }
2558 else if (*stre == 'p') // &ap
2559 {
2560 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2561 {
2562 *s++ = '\'';
2563 ++stre;
2564
2565 g.push(s, stre - s);
2566 return stre;
2567 }
2568 }
2569 break;
2570 }
2571
2572 case 'g': // &g
2573 {
2574 if (*++stre == 't' && *++stre == ';') // &gt;
2575 {
2576 *s++ = '>';
2577 ++stre;
2578
2579 g.push(s, stre - s);
2580 return stre;
2581 }
2582 break;
2583 }
2584
2585 case 'l': // &l
2586 {
2587 if (*++stre == 't' && *++stre == ';') // &lt;
2588 {
2589 *s++ = '<';
2590 ++stre;
2591
2592 g.push(s, stre - s);
2593 return stre;
2594 }
2595 break;
2596 }
2597
2598 case 'q': // &q
2599 {
2600 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2601 {
2602 *s++ = '"';
2603 ++stre;
2604
2605 g.push(s, stre - s);
2606 return stre;
2607 }
2608 break;
2609 }
2610
2611 default:
2612 break;
2613 }
2614
2615 return stre;
2616 }
2617
2618 // Parser utilities
2619 #define PUGI_IMPL_ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2620 #define PUGI_IMPL_SKIPWS() { while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) ++s; }
2621 #define PUGI_IMPL_OPTSET(OPT) ( optmsk & (OPT) )
2622 #define PUGI_IMPL_PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s); }
2623 #define PUGI_IMPL_POPNODE() { cursor = cursor->parent; }
2624 #define PUGI_IMPL_SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2625 #define PUGI_IMPL_SCANWHILE(X) { while (X) ++s; }
2626 #define PUGI_IMPL_SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI_IMPL_UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2627 #define PUGI_IMPL_ENDSEG() { ch = *s; *s = 0; ++s; }
2628 #define PUGI_IMPL_THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2629 #define PUGI_IMPL_CHECK_ERROR(err, m) { if (*s == 0) PUGI_IMPL_THROW_ERROR(err, m); }
2630
2631 PUGI_IMPL_FN char_t* strconv_comment(char_t* s, char_t endch)
2632 {
2633 gap g;
2634
2635 while (true)
2636 {
2637 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_comment));
2638
2639 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2640 {
2641 *s++ = '\n'; // replace first one with 0x0a
2642
2643 if (*s == '\n') g.push(s, 1);
2644 }
2645 else if (s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>')) // comment ends here
2646 {
2647 *g.flush(s) = 0;
2648
2649 return s + (s[2] == '>' ? 3 : 2);
2650 }
2651 else if (*s == 0)
2652 {
2653 return 0;
2654 }
2655 else ++s;
2656 }
2657 }
2658
2659 PUGI_IMPL_FN char_t* strconv_cdata(char_t* s, char_t endch)
2660 {
2661 gap g;
2662
2663 while (true)
2664 {
2665 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_cdata));
2666
2667 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2668 {
2669 *s++ = '\n'; // replace first one with 0x0a
2670
2671 if (*s == '\n') g.push(s, 1);
2672 }
2673 else if (s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>')) // CDATA ends here
2674 {
2675 *g.flush(s) = 0;
2676
2677 return s + 1;
2678 }
2679 else if (*s == 0)
2680 {
2681 return 0;
2682 }
2683 else ++s;
2684 }
2685 }
2686
2687 typedef char_t* (*strconv_pcdata_t)(char_t*);
2688
2689 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2690 {
2691 static char_t* parse(char_t* s)
2692 {
2693 gap g;
2694
2695 char_t* begin = s;
2696
2697 while (true)
2698 {
2699 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_pcdata));
2700
2701 if (*s == '<') // PCDATA ends here
2702 {
2703 char_t* end = g.flush(s);
2704
2705 if (opt_trim::value)
2706 while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space))
2707 --end;
2708
2709 *end = 0;
2710
2711 return s + 1;
2712 }
2713 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2714 {
2715 *s++ = '\n'; // replace first one with 0x0a
2716
2717 if (*s == '\n') g.push(s, 1);
2718 }
2719 else if (opt_escape::value && *s == '&')
2720 {
2721 s = strconv_escape(s, g);
2722 }
2723 else if (*s == 0)
2724 {
2725 char_t* end = g.flush(s);
2726
2727 if (opt_trim::value)
2728 while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space))
2729 --end;
2730
2731 *end = 0;
2732
2733 return s;
2734 }
2735 else ++s;
2736 }
2737 }
2738 };
2739
2740 PUGI_IMPL_FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2741 {
2742 PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2743
2744 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2745 {
2746 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2747 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2748 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2749 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2750 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2751 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2752 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2753 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2754 default: assert(false); return 0; // unreachable
2755 }
2756 }
2757
2758 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2759
2760 template <typename opt_escape> struct strconv_attribute_impl
2761 {
2762 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2763 {
2764 gap g;
2765
2766 // trim leading whitespaces
2767 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
2768 {
2769 char_t* str = s;
2770
2771 do ++str;
2772 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space));
2773
2774 g.push(s, str - s);
2775 }
2776
2777 while (true)
2778 {
2779 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2780
2781 if (*s == end_quote)
2782 {
2783 char_t* str = g.flush(s);
2784
2785 do *str-- = 0;
2786 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space));
2787
2788 return s + 1;
2789 }
2790 else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
2791 {
2792 *s++ = ' ';
2793
2794 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
2795 {
2796 char_t* str = s + 1;
2797 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space)) ++str;
2798
2799 g.push(s, str - s);
2800 }
2801 }
2802 else if (opt_escape::value && *s == '&')
2803 {
2804 s = strconv_escape(s, g);
2805 }
2806 else if (!*s)
2807 {
2808 return 0;
2809 }
2810 else ++s;
2811 }
2812 }
2813
2814 static char_t* parse_wconv(char_t* s, char_t end_quote)
2815 {
2816 gap g;
2817
2818 while (true)
2819 {
2820 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws));
2821
2822 if (*s == end_quote)
2823 {
2824 *g.flush(s) = 0;
2825
2826 return s + 1;
2827 }
2828 else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
2829 {
2830 if (*s == '\r')
2831 {
2832 *s++ = ' ';
2833
2834 if (*s == '\n') g.push(s, 1);
2835 }
2836 else *s++ = ' ';
2837 }
2838 else if (opt_escape::value && *s == '&')
2839 {
2840 s = strconv_escape(s, g);
2841 }
2842 else if (!*s)
2843 {
2844 return 0;
2845 }
2846 else ++s;
2847 }
2848 }
2849
2850 static char_t* parse_eol(char_t* s, char_t end_quote)
2851 {
2852 gap g;
2853
2854 while (true)
2855 {
2856 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr));
2857
2858 if (*s == end_quote)
2859 {
2860 *g.flush(s) = 0;
2861
2862 return s + 1;
2863 }
2864 else if (*s == '\r')
2865 {
2866 *s++ = '\n';
2867
2868 if (*s == '\n') g.push(s, 1);
2869 }
2870 else if (opt_escape::value && *s == '&')
2871 {
2872 s = strconv_escape(s, g);
2873 }
2874 else if (!*s)
2875 {
2876 return 0;
2877 }
2878 else ++s;
2879 }
2880 }
2881
2882 static char_t* parse_simple(char_t* s, char_t end_quote)
2883 {
2884 gap g;
2885
2886 while (true)
2887 {
2888 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr));
2889
2890 if (*s == end_quote)
2891 {
2892 *g.flush(s) = 0;
2893
2894 return s + 1;
2895 }
2896 else if (opt_escape::value && *s == '&')
2897 {
2898 s = strconv_escape(s, g);
2899 }
2900 else if (!*s)
2901 {
2902 return 0;
2903 }
2904 else ++s;
2905 }
2906 }
2907 };
2908
2909 PUGI_IMPL_FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2910 {
2911 PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2912
2913 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2914 {
2915 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2916 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2917 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2918 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2919 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2920 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2921 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2922 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2923 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2924 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2925 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2926 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2927 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2928 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2929 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2930 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2931 default: assert(false); return 0; // unreachable
2932 }
2933 }
2934
2935 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2936 {
2937 xml_parse_result result;
2938 result.status = status;
2939 result.offset = offset;
2940
2941 return result;
2942 }
2943
2944 struct xml_parser
2945 {
2946 xml_allocator* alloc;
2947 char_t* error_offset;
2948 xml_parse_status error_status;
2949
2950 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2951 {
2952 }
2953
2954 // DOCTYPE consists of nested sections of the following possible types:
2955 // <!-- ... -->, <? ... ?>, "...", '...'
2956 // <![...]]>
2957 // <!...>
2958 // First group can not contain nested groups
2959 // Second group can contain nested groups of the same type
2960 // Third group can contain all other groups
2961 char_t* parse_doctype_primitive(char_t* s)
2962 {
2963 if (*s == '"' || *s == '\'')
2964 {
2965 // quoted string
2966 char_t ch = *s++;
2967 PUGI_IMPL_SCANFOR(*s == ch);
2968 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
2969
2970 s++;
2971 }
2972 else if (s[0] == '<' && s[1] == '?')
2973 {
2974 // <? ... ?>
2975 s += 2;
2976 PUGI_IMPL_SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2977 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
2978
2979 s += 2;
2980 }
2981 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2982 {
2983 s += 4;
2984 PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2985 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
2986
2987 s += 3;
2988 }
2989 else PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
2990
2991 return s;
2992 }
2993
2994 char_t* parse_doctype_ignore(char_t* s)
2995 {
2996 size_t depth = 0;
2997
2998 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2999 s += 3;
3000
3001 while (*s)
3002 {
3003 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
3004 {
3005 // nested ignore section
3006 s += 3;
3007 depth++;
3008 }
3009 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
3010 {
3011 // ignore section end
3012 s += 3;
3013
3014 if (depth == 0)
3015 return s;
3016
3017 depth--;
3018 }
3019 else s++;
3020 }
3021
3022 PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
3023 }
3024
3025 char_t* parse_doctype_group(char_t* s, char_t endch)
3026 {
3027 size_t depth = 0;
3028
3029 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
3030 s += 2;
3031
3032 while (*s)
3033 {
3034 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3035 {
3036 if (s[2] == '[')
3037 {
3038 // ignore
3039 s = parse_doctype_ignore(s);
3040 if (!s) return s;
3041 }
3042 else
3043 {
3044 // some control group
3045 s += 2;
3046 depth++;
3047 }
3048 }
3049 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3050 {
3051 // unknown tag (forbidden), or some primitive group
3052 s = parse_doctype_primitive(s);
3053 if (!s) return s;
3054 }
3055 else if (*s == '>')
3056 {
3057 if (depth == 0)
3058 return s;
3059
3060 depth--;
3061 s++;
3062 }
3063 else s++;
3064 }
3065
3066 if (depth != 0 || endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
3067
3068 return s;
3069 }
3070
3071 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3072 {
3073 // parse node contents, starting with exclamation mark
3074 ++s;
3075
3076 if (*s == '-') // '<!-...'
3077 {
3078 ++s;
3079
3080 if (*s == '-') // '<!--...'
3081 {
3082 ++s;
3083
3084 if (PUGI_IMPL_OPTSET(parse_comments))
3085 {
3086 PUGI_IMPL_PUSHNODE(node_comment); // Append a new node on the tree.
3087 cursor->value = s; // Save the offset.
3088 }
3089
3090 if (PUGI_IMPL_OPTSET(parse_eol) && PUGI_IMPL_OPTSET(parse_comments))
3091 {
3092 s = strconv_comment(s, endch);
3093
3094 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_comment, cursor->value);
3095 }
3096 else
3097 {
3098 // Scan for terminating '-->'.
3099 PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>'));
3100 PUGI_IMPL_CHECK_ERROR(status_bad_comment, s);
3101
3102 if (PUGI_IMPL_OPTSET(parse_comments))
3103 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3104
3105 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3106 }
3107 }
3108 else PUGI_IMPL_THROW_ERROR(status_bad_comment, s);
3109 }
3110 else if (*s == '[')
3111 {
3112 // '<![CDATA[...'
3113 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3114 {
3115 ++s;
3116
3117 if (PUGI_IMPL_OPTSET(parse_cdata))
3118 {
3119 PUGI_IMPL_PUSHNODE(node_cdata); // Append a new node on the tree.
3120 cursor->value = s; // Save the offset.
3121
3122 if (PUGI_IMPL_OPTSET(parse_eol))
3123 {
3124 s = strconv_cdata(s, endch);
3125
3126 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_cdata, cursor->value);
3127 }
3128 else
3129 {
3130 // Scan for terminating ']]>'.
3131 PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>'));
3132 PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s);
3133
3134 *s++ = 0; // Zero-terminate this segment.
3135 }
3136 }
3137 else // Flagged for discard, but we still have to scan for the terminator.
3138 {
3139 // Scan for terminating ']]>'.
3140 PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>'));
3141 PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s);
3142
3143 ++s;
3144 }
3145
3146 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3147 }
3148 else PUGI_IMPL_THROW_ERROR(status_bad_cdata, s);
3149 }
3150 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI_IMPL_ENDSWITH(s[6], 'E'))
3151 {
3152 s -= 2;
3153
3154 if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s);
3155
3156 char_t* mark = s + 9;
3157
3158 s = parse_doctype_group(s, endch);
3159 if (!s) return s;
3160
3161 assert((*s == 0 && endch == '>') || *s == '>');
3162 if (*s) *s++ = 0;
3163
3164 if (PUGI_IMPL_OPTSET(parse_doctype))
3165 {
3166 while (PUGI_IMPL_IS_CHARTYPE(*mark, ct_space)) ++mark;
3167
3168 PUGI_IMPL_PUSHNODE(node_doctype);
3169
3170 cursor->value = mark;
3171 }
3172 }
3173 else if (*s == 0 && endch == '-') PUGI_IMPL_THROW_ERROR(status_bad_comment, s);
3174 else if (*s == 0 && endch == '[') PUGI_IMPL_THROW_ERROR(status_bad_cdata, s);
3175 else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s);
3176
3177 return s;
3178 }
3179
3180 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3181 {
3182 // load into registers
3183 xml_node_struct* cursor = ref_cursor;
3184 char_t ch = 0;
3185
3186 // parse node contents, starting with question mark
3187 ++s;
3188
3189 // read PI target
3190 char_t* target = s;
3191
3192 if (!PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
3193
3194 PUGI_IMPL_SCANWHILE(PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol));
3195 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
3196
3197 // determine node type; stricmp / strcasecmp is not portable
3198 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3199
3200 if (declaration ? PUGI_IMPL_OPTSET(parse_declaration) : PUGI_IMPL_OPTSET(parse_pi))
3201 {
3202 if (declaration)
3203 {
3204 // disallow non top-level declarations
3205 if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
3206
3207 PUGI_IMPL_PUSHNODE(node_declaration);
3208 }
3209 else
3210 {
3211 PUGI_IMPL_PUSHNODE(node_pi);
3212 }
3213
3214 cursor->name = target;
3215
3216 PUGI_IMPL_ENDSEG();
3217
3218 // parse value/attributes
3219 if (ch == '?')
3220 {
3221 // empty node
3222 if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
3223 s += (*s == '>');
3224
3225 PUGI_IMPL_POPNODE();
3226 }
3227 else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
3228 {
3229 PUGI_IMPL_SKIPWS();
3230
3231 // scan for tag end
3232 char_t* value = s;
3233
3234 PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>'));
3235 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
3236
3237 if (declaration)
3238 {
3239 // replace ending ? with / so that 'element' terminates properly
3240 *s = '/';
3241
3242 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3243 s = value;
3244 }
3245 else
3246 {
3247 // store value and step over >
3248 cursor->value = value;
3249
3250 PUGI_IMPL_POPNODE();
3251
3252 PUGI_IMPL_ENDSEG();
3253
3254 s += (*s == '>');
3255 }
3256 }
3257 else PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
3258 }
3259 else
3260 {
3261 // scan for tag end
3262 PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>'));
3263 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s);
3264
3265 s += (s[1] == '>' ? 2 : 1);
3266 }
3267
3268 // store from registers
3269 ref_cursor = cursor;
3270
3271 return s;
3272 }
3273
3274 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3275 {
3276 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3277 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3278
3279 char_t ch = 0;
3280 xml_node_struct* cursor = root;
3281 char_t* mark = s;
3282 char_t* merged_pcdata = s;
3283
3284 while (*s != 0)
3285 {
3286 if (*s == '<')
3287 {
3288 ++s;
3289
3290 LOC_TAG:
3291 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3292 {
3293 PUGI_IMPL_PUSHNODE(node_element); // Append a new node to the tree.
3294
3295 cursor->name = s;
3296
3297 PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3298 PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over.
3299
3300 if (ch == '>')
3301 {
3302 // end of tag
3303 }
3304 else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
3305 {
3306 LOC_ATTRIBUTES:
3307 while (true)
3308 {
3309 PUGI_IMPL_SKIPWS(); // Eat any whitespace.
3310
3311 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3312 {
3313 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3314 if (!a) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s);
3315
3316 a->name = s; // Save the offset.
3317
3318 PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3319 PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over.
3320
3321 if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
3322 {
3323 PUGI_IMPL_SKIPWS(); // Eat any whitespace.
3324
3325 ch = *s;
3326 ++s;
3327 }
3328
3329 if (ch == '=') // '<... #=...'
3330 {
3331 PUGI_IMPL_SKIPWS(); // Eat any whitespace.
3332
3333 if (*s == '"' || *s == '\'') // '<... #="...'
3334 {
3335 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3336 ++s; // Step over the quote.
3337 a->value = s; // Save the offset.
3338
3339 s = strconv_attribute(s, ch);
3340
3341 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_attribute, a->value);
3342
3343 // After this line the loop continues from the start;
3344 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3345 // everything else will be detected
3346 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
3347 }
3348 else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
3349 }
3350 else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s);
3351 }
3352 else if (*s == '/')
3353 {
3354 ++s;
3355
3356 if (*s == '>')
3357 {
3358 PUGI_IMPL_POPNODE();
3359 s++;
3360 break;
3361 }
3362 else if (*s == 0 && endch == '>')
3363 {
3364 PUGI_IMPL_POPNODE();
3365 break;
3366 }
3367 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
3368 }
3369 else if (*s == '>')
3370 {
3371 ++s;
3372
3373 break;
3374 }
3375 else if (*s == 0 && endch == '>')
3376 {
3377 break;
3378 }
3379 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
3380 }
3381
3382 // !!!
3383 }
3384 else if (ch == '/') // '<#.../'
3385 {
3386 if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
3387
3388 PUGI_IMPL_POPNODE(); // Pop.
3389
3390 s += (*s == '>');
3391 }
3392 else if (ch == 0)
3393 {
3394 // we stepped over null terminator, backtrack & handle closing tag
3395 --s;
3396
3397 if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
3398 }
3399 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s);
3400 }
3401 else if (*s == '/')
3402 {
3403 ++s;
3404
3405 mark = s;
3406
3407 char_t* name = cursor->name;
3408 if (!name) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
3409
3410 while (PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol))
3411 {
3412 if (*s++ != *name++) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
3413 }
3414
3415 if (*name)
3416 {
3417 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
3418 else PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark);
3419 }
3420
3421 PUGI_IMPL_POPNODE(); // Pop.
3422
3423 PUGI_IMPL_SKIPWS();
3424
3425 if (*s == 0)
3426 {
3427 if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
3428 }
3429 else
3430 {
3431 if (*s != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s);
3432 ++s;
3433 }
3434 }
3435 else if (*s == '?') // '<?...'
3436 {
3437 s = parse_question(s, cursor, optmsk, endch);
3438 if (!s) return s;
3439
3440 assert(cursor);
3441 if (PUGI_IMPL_NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3442 }
3443 else if (*s == '!') // '<!...'
3444 {
3445 s = parse_exclamation(s, cursor, optmsk, endch);
3446 if (!s) return s;
3447 }
3448 else if (*s == 0 && endch == '?') PUGI_IMPL_THROW_ERROR(status_bad_pi, s);
3449 else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s);
3450 }
3451 else
3452 {
3453 mark = s; // Save this offset while searching for a terminator.
3454
3455 PUGI_IMPL_SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3456
3457 if (*s == '<' || !*s)
3458 {
3459 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3460 assert(mark != s);
3461
3462 if (!PUGI_IMPL_OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI_IMPL_OPTSET(parse_trim_pcdata))
3463 {
3464 continue;
3465 }
3466 else if (PUGI_IMPL_OPTSET(parse_ws_pcdata_single))
3467 {
3468 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3469 }
3470 }
3471
3472 if (!PUGI_IMPL_OPTSET(parse_trim_pcdata))
3473 s = mark;
3474
3475 if (cursor->parent || PUGI_IMPL_OPTSET(parse_fragment))
3476 {
3477 char_t* parsed_pcdata = s;
3478
3479 s = strconv_pcdata(s);
3480
3481 if (PUGI_IMPL_OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3482 {
3483 cursor->value = parsed_pcdata; // Save the offset.
3484 }
3485 else if (PUGI_IMPL_OPTSET(parse_merge_pcdata) && cursor->first_child && PUGI_IMPL_NODETYPE(cursor->first_child->prev_sibling_c) == node_pcdata)
3486 {
3487 assert(merged_pcdata >= cursor->first_child->prev_sibling_c->value);
3488
3489 // Catch up to the end of last parsed value; only needed for the first fragment.
3490 merged_pcdata += strlength(merged_pcdata);
3491
3492 size_t length = strlength(parsed_pcdata);
3493
3494 // Must use memmove instead of memcpy as this move may overlap
3495 memmove(merged_pcdata, parsed_pcdata, (length + 1) * sizeof(char_t));
3496 merged_pcdata += length;
3497 }
3498 else
3499 {
3500 xml_node_struct* prev_cursor = cursor;
3501 PUGI_IMPL_PUSHNODE(node_pcdata); // Append a new node on the tree.
3502
3503 cursor->value = parsed_pcdata; // Save the offset.
3504 merged_pcdata = parsed_pcdata; // Used for parse_merge_pcdata above, cheaper to save unconditionally
3505
3506 cursor = prev_cursor; // Pop since this is a standalone.
3507 }
3508
3509 if (!*s) break;
3510 }
3511 else
3512 {
3513 PUGI_IMPL_SCANFOR(*s == '<'); // '...<'
3514 if (!*s) break;
3515
3516 ++s;
3517 }
3518
3519 // We're after '<'
3520 goto LOC_TAG;
3521 }
3522 }
3523
3524 // check that last tag is closed
3525 if (cursor != root) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, s);
3526
3527 return s;
3528 }
3529
3530 #ifdef PUGIXML_WCHAR_MODE
3531 static char_t* parse_skip_bom(char_t* s)
3532 {
3533 unsigned int bom = 0xfeff;
3534 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3535 }
3536 #else
3537 static char_t* parse_skip_bom(char_t* s)
3538 {
3539 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3540 }
3541 #endif
3542
3543 static bool has_element_node_siblings(xml_node_struct* node)
3544 {
3545 while (node)
3546 {
3547 if (PUGI_IMPL_NODETYPE(node) == node_element) return true;
3548
3549 node = node->next_sibling;
3550 }
3551
3552 return false;
3553 }
3554
3555 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3556 {
3557 // early-out for empty documents
3558 if (length == 0)
3559 return make_parse_result(PUGI_IMPL_OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3560
3561 // get last child of the root before parsing
3562 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3563
3564 // create parser on stack
3565 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3566
3567 // save last character and make buffer zero-terminated (speeds up parsing)
3568 char_t endch = buffer[length - 1];
3569 buffer[length - 1] = 0;
3570
3571 // skip BOM to make sure it does not end up as part of parse output
3572 char_t* buffer_data = parse_skip_bom(buffer);
3573
3574 // perform actual parsing
3575 parser.parse_tree(buffer_data, root, optmsk, endch);
3576
3577 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3578 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3579
3580 if (result)
3581 {
3582 // since we removed last character, we have to handle the only possible false positive (stray <)
3583 if (endch == '<')
3584 return make_parse_result(status_unrecognized_tag, length - 1);
3585
3586 // check if there are any element nodes parsed
3587 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child + 0;
3588
3589 if (!PUGI_IMPL_OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3590 return make_parse_result(status_no_document_element, length - 1);
3591 }
3592 else
3593 {
3594 // roll back offset if it occurs on a null terminator in the source buffer
3595 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3596 result.offset--;
3597 }
3598
3599 return result;
3600 }
3601 };
3602
3603 // Output facilities
3604 PUGI_IMPL_FN xml_encoding get_write_native_encoding()
3605 {
3606 #ifdef PUGIXML_WCHAR_MODE
3607 return get_wchar_encoding();
3608 #else
3609 return encoding_utf8;
3610 #endif
3611 }
3612
3613 PUGI_IMPL_FN xml_encoding get_write_encoding(xml_encoding encoding)
3614 {
3615 // replace wchar encoding with utf implementation
3616 if (encoding == encoding_wchar) return get_wchar_encoding();
3617
3618 // replace utf16 encoding with utf16 with specific endianness
3619 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3620
3621 // replace utf32 encoding with utf32 with specific endianness
3622 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3623
3624 // only do autodetection if no explicit encoding is requested
3625 if (encoding != encoding_auto) return encoding;
3626
3627 // assume utf8 encoding
3628 return encoding_utf8;
3629 }
3630
3631 template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3632 {
3633 PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3634
3635 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3636
3637 return static_cast<size_t>(end - dest) * sizeof(*dest);
3638 }
3639
3640 template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3641 {
3642 PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3643
3644 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3645
3646 if (opt_swap)
3647 {
3648 for (typename T::value_type i = dest; i != end; ++i)
3649 *i = endian_swap(*i);
3650 }
3651
3652 return static_cast<size_t>(end - dest) * sizeof(*dest);
3653 }
3654
3655 #ifdef PUGIXML_WCHAR_MODE
3656 PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length)
3657 {
3658 if (length < 1) return 0;
3659
3660 // discard last character if it's the lead of a surrogate pair
3661 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3662 }
3663
3664 PUGI_IMPL_FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3665 {
3666 // only endian-swapping is required
3667 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3668 {
3669 convert_wchar_endian_swap(r_char, data, length);
3670
3671 return length * sizeof(char_t);
3672 }
3673
3674 // convert to utf8
3675 if (encoding == encoding_utf8)
3676 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3677
3678 // convert to utf16
3679 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3680 {
3681 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3682
3683 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3684 }
3685
3686 // convert to utf32
3687 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3688 {
3689 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3690
3691 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3692 }
3693
3694 // convert to latin1
3695 if (encoding == encoding_latin1)
3696 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3697
3698 assert(false && "Invalid encoding"); // unreachable
3699 return 0;
3700 }
3701 #else
3702 PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length)
3703 {
3704 if (length < 5) return 0;
3705
3706 for (size_t i = 1; i <= 4; ++i)
3707 {
3708 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3709
3710 // either a standalone character or a leading one
3711 if ((ch & 0xc0) != 0x80) return length - i;
3712 }
3713
3714 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3715 return length;
3716 }
3717
3718 PUGI_IMPL_FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3719 {
3720 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3721 {
3722 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3723
3724 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3725 }
3726
3727 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3728 {
3729 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3730
3731 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3732 }
3733
3734 if (encoding == encoding_latin1)
3735 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3736
3737 assert(false && "Invalid encoding"); // unreachable
3738 return 0;
3739 }
3740 #endif
3741
3742 class xml_buffered_writer
3743 {
3744 xml_buffered_writer(const xml_buffered_writer&);
3745 xml_buffered_writer& operator=(const xml_buffered_writer&);
3746
3747 public:
3748 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3749 {
3750 PUGI_IMPL_STATIC_ASSERT(bufcapacity >= 8);
3751 }
3752
3753 size_t flush()
3754 {
3755 flush(buffer, bufsize);
3756 bufsize = 0;
3757 return 0;
3758 }
3759
3760 void flush(const char_t* data, size_t size)
3761 {
3762 if (size == 0) return;
3763
3764 // fast path, just write data
3765 if (encoding == get_write_native_encoding())
3766 writer.write(data, size * sizeof(char_t));
3767 else
3768 {
3769 // convert chunk
3770 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3771 assert(result <= sizeof(scratch));
3772
3773 // write data
3774 writer.write(scratch.data_u8, result);
3775 }
3776 }
3777
3778 void write_direct(const char_t* data, size_t length)
3779 {
3780 // flush the remaining buffer contents
3781 flush();
3782
3783 // handle large chunks
3784 if (length > bufcapacity)
3785 {
3786 if (encoding == get_write_native_encoding())
3787 {
3788 // fast path, can just write data chunk
3789 writer.write(data, length * sizeof(char_t));
3790 return;
3791 }
3792
3793 // need to convert in suitable chunks
3794 while (length > bufcapacity)
3795 {
3796 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3797 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3798 size_t chunk_size = get_valid_length(data, bufcapacity);
3799 assert(chunk_size);
3800
3801 // convert chunk and write
3802 flush(data, chunk_size);
3803
3804 // iterate
3805 data += chunk_size;
3806 length -= chunk_size;
3807 }
3808
3809 // small tail is copied below
3810 bufsize = 0;
3811 }
3812
3813 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3814 bufsize += length;
3815 }
3816
3817 void write_buffer(const char_t* data, size_t length)
3818 {
3819 size_t offset = bufsize;
3820
3821 if (offset + length <= bufcapacity)
3822 {
3823 memcpy(buffer + offset, data, length * sizeof(char_t));
3824 bufsize = offset + length;
3825 }
3826 else
3827 {
3828 write_direct(data, length);
3829 }
3830 }
3831
3832 void write_string(const char_t* data)
3833 {
3834 // write the part of the string that fits in the buffer
3835 size_t offset = bufsize;
3836
3837 while (*data && offset < bufcapacity)
3838 buffer[offset++] = *data++;
3839
3840 // write the rest
3841 if (offset < bufcapacity)
3842 {
3843 bufsize = offset;
3844 }
3845 else
3846 {
3847 // backtrack a bit if we have split the codepoint
3848 size_t length = offset - bufsize;
3849 size_t extra = length - get_valid_length(data - length, length);
3850
3851 bufsize = offset - extra;
3852
3853 write_direct(data - extra, strlength(data) + extra);
3854 }
3855 }
3856
3857 void write(char_t d0)
3858 {
3859 size_t offset = bufsize;
3860 if (offset > bufcapacity - 1) offset = flush();
3861
3862 buffer[offset + 0] = d0;
3863 bufsize = offset + 1;
3864 }
3865
3866 void write(char_t d0, char_t d1)
3867 {
3868 size_t offset = bufsize;
3869 if (offset > bufcapacity - 2) offset = flush();
3870
3871 buffer[offset + 0] = d0;
3872 buffer[offset + 1] = d1;
3873 bufsize = offset + 2;
3874 }
3875
3876 void write(char_t d0, char_t d1, char_t d2)
3877 {
3878 size_t offset = bufsize;
3879 if (offset > bufcapacity - 3) offset = flush();
3880
3881 buffer[offset + 0] = d0;
3882 buffer[offset + 1] = d1;
3883 buffer[offset + 2] = d2;
3884 bufsize = offset + 3;
3885 }
3886
3887 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3888 {
3889 size_t offset = bufsize;
3890 if (offset > bufcapacity - 4) offset = flush();
3891
3892 buffer[offset + 0] = d0;
3893 buffer[offset + 1] = d1;
3894 buffer[offset + 2] = d2;
3895 buffer[offset + 3] = d3;
3896 bufsize = offset + 4;
3897 }
3898
3899 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3900 {
3901 size_t offset = bufsize;
3902 if (offset > bufcapacity - 5) offset = flush();
3903
3904 buffer[offset + 0] = d0;
3905 buffer[offset + 1] = d1;
3906 buffer[offset + 2] = d2;
3907 buffer[offset + 3] = d3;
3908 buffer[offset + 4] = d4;
3909 bufsize = offset + 5;
3910 }
3911
3912 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3913 {
3914 size_t offset = bufsize;
3915 if (offset > bufcapacity - 6) offset = flush();
3916
3917 buffer[offset + 0] = d0;
3918 buffer[offset + 1] = d1;
3919 buffer[offset + 2] = d2;
3920 buffer[offset + 3] = d3;
3921 buffer[offset + 4] = d4;
3922 buffer[offset + 5] = d5;
3923 bufsize = offset + 6;
3924 }
3925
3926 // utf8 maximum expansion: x4 (-> utf32)
3927 // utf16 maximum expansion: x2 (-> utf32)
3928 // utf32 maximum expansion: x1
3929 enum
3930 {
3931 bufcapacitybytes =
3932 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3933 PUGIXML_MEMORY_OUTPUT_STACK
3934 #else
3935 10240
3936 #endif
3937 ,
3938 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3939 };
3940
3941 char_t buffer[bufcapacity];
3942
3943 union
3944 {
3945 uint8_t data_u8[4 * bufcapacity];
3946 uint16_t data_u16[2 * bufcapacity];
3947 uint32_t data_u32[bufcapacity];
3948 char_t data_char[bufcapacity];
3949 } scratch;
3950
3951 xml_writer& writer;
3952 size_t bufsize;
3953 xml_encoding encoding;
3954 };
3955
3956 PUGI_IMPL_FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3957 {
3958 while (*s)
3959 {
3960 const char_t* prev = s;
3961
3962 // While *s is a usual symbol
3963 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPEX(ss, type));
3964
3965 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3966
3967 switch (*s)
3968 {
3969 case 0: break;
3970 case '&':
3971 writer.write('&', 'a', 'm', 'p', ';');
3972 ++s;
3973 break;
3974 case '<':
3975 writer.write('&', 'l', 't', ';');
3976 ++s;
3977 break;
3978 case '>':
3979 writer.write('&', 'g', 't', ';');
3980 ++s;
3981 break;
3982 case '"':
3983 if (flags & format_attribute_single_quote)
3984 writer.write('"');
3985 else
3986 writer.write('&', 'q', 'u', 'o', 't', ';');
3987 ++s;
3988 break;
3989 case '\'':
3990 if (flags & format_attribute_single_quote)
3991 writer.write('&', 'a', 'p', 'o', 's', ';');
3992 else
3993 writer.write('\'');
3994 ++s;
3995 break;
3996 default: // s is not a usual symbol
3997 {
3998 unsigned int ch = static_cast<unsigned int>(*s++);
3999 assert(ch < 32);
4000
4001 if (!(flags & format_skip_control_chars))
4002 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
4003 }
4004 }
4005 }
4006 }
4007
4008 PUGI_IMPL_FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
4009 {
4010 if (flags & format_no_escapes)
4011 writer.write_string(s);
4012 else
4013 text_output_escaped(writer, s, type, flags);
4014 }
4015
4016 PUGI_IMPL_FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
4017 {
4018 do
4019 {
4020 writer.write('<', '!', '[', 'C', 'D');
4021 writer.write('A', 'T', 'A', '[');
4022
4023 const char_t* prev = s;
4024
4025 // look for ]]> sequence - we can't output it as is since it terminates CDATA
4026 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
4027
4028 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
4029 if (*s) s += 2;
4030
4031 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4032
4033 writer.write(']', ']', '>');
4034 }
4035 while (*s);
4036 }
4037
4038 PUGI_IMPL_FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
4039 {
4040 switch (indent_length)
4041 {
4042 case 1:
4043 {
4044 for (unsigned int i = 0; i < depth; ++i)
4045 writer.write(indent[0]);
4046 break;
4047 }
4048
4049 case 2:
4050 {
4051 for (unsigned int i = 0; i < depth; ++i)
4052 writer.write(indent[0], indent[1]);
4053 break;
4054 }
4055
4056 case 3:
4057 {
4058 for (unsigned int i = 0; i < depth; ++i)
4059 writer.write(indent[0], indent[1], indent[2]);
4060 break;
4061 }
4062
4063 case 4:
4064 {
4065 for (unsigned int i = 0; i < depth; ++i)
4066 writer.write(indent[0], indent[1], indent[2], indent[3]);
4067 break;
4068 }
4069
4070 default:
4071 {
4072 for (unsigned int i = 0; i < depth; ++i)
4073 writer.write_buffer(indent, indent_length);
4074 }
4075 }
4076 }
4077
4078 PUGI_IMPL_FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4079 {
4080 writer.write('<', '!', '-', '-');
4081
4082 while (*s)
4083 {
4084 const char_t* prev = s;
4085
4086 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4087 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4088
4089 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4090
4091 if (*s)
4092 {
4093 assert(*s == '-');
4094
4095 writer.write('-', ' ');
4096 ++s;
4097 }
4098 }
4099
4100 writer.write('-', '-', '>');
4101 }
4102
4103 PUGI_IMPL_FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4104 {
4105 while (*s)
4106 {
4107 const char_t* prev = s;
4108
4109 // look for ?> sequence - we can't output it since ?> terminates PI
4110 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4111
4112 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4113
4114 if (*s)
4115 {
4116 assert(s[0] == '?' && s[1] == '>');
4117
4118 writer.write('?', ' ', '>');
4119 s += 2;
4120 }
4121 }
4122 }
4123
4124 PUGI_IMPL_FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4125 {
4126 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4127 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4128
4129 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4130 {
4131 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4132 {
4133 writer.write('\n');
4134
4135 text_output_indent(writer, indent, indent_length, depth + 1);
4136 }
4137 else
4138 {
4139 writer.write(' ');
4140 }
4141
4142 writer.write_string(a->name ? a->name + 0 : default_name);
4143 writer.write('=', enquotation_char);
4144
4145 if (a->value)
4146 text_output(writer, a->value, ctx_special_attr, flags);
4147
4148 writer.write(enquotation_char);
4149 }
4150 }
4151
4152 PUGI_IMPL_FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4153 {
4154 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4155 const char_t* name = node->name ? node->name + 0 : default_name;
4156
4157 writer.write('<');
4158 writer.write_string(name);
4159
4160 if (node->first_attribute)
4161 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4162
4163 // element nodes can have value if parse_embed_pcdata was used
4164 if (!node->value)
4165 {
4166 if (!node->first_child)
4167 {
4168 if (flags & format_no_empty_element_tags)
4169 {
4170 writer.write('>', '<', '/');
4171 writer.write_string(name);
4172 writer.write('>');
4173
4174 return false;
4175 }
4176 else
4177 {
4178 if ((flags & format_raw) == 0)
4179 writer.write(' ');
4180
4181 writer.write('/', '>');
4182
4183 return false;
4184 }
4185 }
4186 else
4187 {
4188 writer.write('>');
4189
4190 return true;
4191 }
4192 }
4193 else
4194 {
4195 writer.write('>');
4196
4197 text_output(writer, node->value, ctx_special_pcdata, flags);
4198
4199 if (!node->first_child)
4200 {
4201 writer.write('<', '/');
4202 writer.write_string(name);
4203 writer.write('>');
4204
4205 return false;
4206 }
4207 else
4208 {
4209 return true;
4210 }
4211 }
4212 }
4213
4214 PUGI_IMPL_FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4215 {
4216 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4217 const char_t* name = node->name ? node->name + 0 : default_name;
4218
4219 writer.write('<', '/');
4220 writer.write_string(name);
4221 writer.write('>');
4222 }
4223
4224 PUGI_IMPL_FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4225 {
4226 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4227
4228 switch (PUGI_IMPL_NODETYPE(node))
4229 {
4230 case node_pcdata:
4231 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4232 break;
4233
4234 case node_cdata:
4235 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4236 break;
4237
4238 case node_comment:
4239 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4240 break;
4241
4242 case node_pi:
4243 writer.write('<', '?');
4244 writer.write_string(node->name ? node->name + 0 : default_name);
4245
4246 if (node->value)
4247 {
4248 writer.write(' ');
4249 node_output_pi_value(writer, node->value);
4250 }
4251
4252 writer.write('?', '>');
4253 break;
4254
4255 case node_declaration:
4256 writer.write('<', '?');
4257 writer.write_string(node->name ? node->name + 0 : default_name);
4258 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4259 writer.write('?', '>');
4260 break;
4261
4262 case node_doctype:
4263 writer.write('<', '!', 'D', 'O', 'C');
4264 writer.write('T', 'Y', 'P', 'E');
4265
4266 if (node->value)
4267 {
4268 writer.write(' ');
4269 writer.write_string(node->value);
4270 }
4271
4272 writer.write('>');
4273 break;
4274
4275 default:
4276 assert(false && "Invalid node type"); // unreachable
4277 }
4278 }
4279
4280 enum indent_flags_t
4281 {
4282 indent_newline = 1,
4283 indent_indent = 2
4284 };
4285
4286 PUGI_IMPL_FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4287 {
4288 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4289 unsigned int indent_flags = indent_indent;
4290
4291 xml_node_struct* node = root;
4292
4293 do
4294 {
4295 assert(node);
4296
4297 // begin writing current node
4298 if (PUGI_IMPL_NODETYPE(node) == node_pcdata || PUGI_IMPL_NODETYPE(node) == node_cdata)
4299 {
4300 node_output_simple(writer, node, flags);
4301
4302 indent_flags = 0;
4303 }
4304 else
4305 {
4306 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4307 writer.write('\n');
4308
4309 if ((indent_flags & indent_indent) && indent_length)
4310 text_output_indent(writer, indent, indent_length, depth);
4311
4312 if (PUGI_IMPL_NODETYPE(node) == node_element)
4313 {
4314 indent_flags = indent_newline | indent_indent;
4315
4316 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4317 {
4318 // element nodes can have value if parse_embed_pcdata was used
4319 if (node->value)
4320 indent_flags = 0;
4321
4322 node = node->first_child;
4323 depth++;
4324 continue;
4325 }
4326 }
4327 else if (PUGI_IMPL_NODETYPE(node) == node_document)
4328 {
4329 indent_flags = indent_indent;
4330
4331 if (node->first_child)
4332 {
4333 node = node->first_child;
4334 continue;
4335 }
4336 }
4337 else
4338 {
4339 node_output_simple(writer, node, flags);
4340
4341 indent_flags = indent_newline | indent_indent;
4342 }
4343 }
4344
4345 // continue to the next node
4346 while (node != root)
4347 {
4348 if (node->next_sibling)
4349 {
4350 node = node->next_sibling;
4351 break;
4352 }
4353
4354 node = node->parent;
4355
4356 // write closing node
4357 if (PUGI_IMPL_NODETYPE(node) == node_element)
4358 {
4359 depth--;
4360
4361 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4362 writer.write('\n');
4363
4364 if ((indent_flags & indent_indent) && indent_length)
4365 text_output_indent(writer, indent, indent_length, depth);
4366
4367 node_output_end(writer, node);
4368
4369 indent_flags = indent_newline | indent_indent;
4370 }
4371 }
4372 }
4373 while (node != root);
4374
4375 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4376 writer.write('\n');
4377 }
4378
4379 PUGI_IMPL_FN bool has_declaration(xml_node_struct* node)
4380 {
4381 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4382 {
4383 xml_node_type type = PUGI_IMPL_NODETYPE(child);
4384
4385 if (type == node_declaration) return true;
4386 if (type == node_element) return false;
4387 }
4388
4389 return false;
4390 }
4391
4392 PUGI_IMPL_FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4393 {
4394 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4395 if (a == attr)
4396 return true;
4397
4398 return false;
4399 }
4400
4401 PUGI_IMPL_FN bool allow_insert_attribute(xml_node_type parent)
4402 {
4403 return parent == node_element || parent == node_declaration;
4404 }
4405
4406 PUGI_IMPL_FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4407 {
4408 if (parent != node_document && parent != node_element) return false;
4409 if (child == node_document || child == node_null) return false;
4410 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4411
4412 return true;
4413 }
4414
4415 PUGI_IMPL_FN bool allow_move(xml_node parent, xml_node child)
4416 {
4417 // check that child can be a child of parent
4418 if (!allow_insert_child(parent.type(), child.type()))
4419 return false;
4420
4421 // check that node is not moved between documents
4422 if (parent.root() != child.root())
4423 return false;
4424
4425 // check that new parent is not in the child subtree
4426 xml_node cur = parent;
4427
4428 while (cur)
4429 {
4430 if (cur == child)
4431 return false;
4432
4433 cur = cur.parent();
4434 }
4435
4436 return true;
4437 }
4438
4439 template <typename String, typename Header>
4440 PUGI_IMPL_FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4441 {
4442 assert(!dest && (header & header_mask) == 0); // copies are performed into fresh nodes
4443
4444 if (source)
4445 {
4446 if (alloc && (source_header & header_mask) == 0)
4447 {
4448 dest = source;
4449
4450 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4451 header |= xml_memory_page_contents_shared_mask;
4452 source_header |= xml_memory_page_contents_shared_mask;
4453 }
4454 else
4455 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4456 }
4457 }
4458
4459 PUGI_IMPL_FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4460 {
4461 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4462 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4463
4464 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4465 {
4466 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4467
4468 if (da)
4469 {
4470 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4471 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4472 }
4473 }
4474 }
4475
4476 PUGI_IMPL_FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4477 {
4478 xml_allocator& alloc = get_allocator(dn);
4479 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4480
4481 node_copy_contents(dn, sn, shared_alloc);
4482
4483 xml_node_struct* dit = dn;
4484 xml_node_struct* sit = sn->first_child;
4485
4486 while (sit && sit != sn)
4487 {
4488 // loop invariant: dit is inside the subtree rooted at dn
4489 assert(dit);
4490
4491 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4492 if (sit != dn)
4493 {
4494 xml_node_struct* copy = append_new_node(dit, alloc, PUGI_IMPL_NODETYPE(sit));
4495
4496 if (copy)
4497 {
4498 node_copy_contents(copy, sit, shared_alloc);
4499
4500 if (sit->first_child)
4501 {
4502 dit = copy;
4503 sit = sit->first_child;
4504 continue;
4505 }
4506 }
4507 }
4508
4509 // continue to the next node
4510 do
4511 {
4512 if (sit->next_sibling)
4513 {
4514 sit = sit->next_sibling;
4515 break;
4516 }
4517
4518 sit = sit->parent;
4519 dit = dit->parent;
4520
4521 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4522 assert(sit == sn || dit);
4523 }
4524 while (sit != sn);
4525 }
4526
4527 assert(!sit || dit == dn->parent);
4528 }
4529
4530 PUGI_IMPL_FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4531 {
4532 xml_allocator& alloc = get_allocator(da);
4533 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4534
4535 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4536 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4537 }
4538
4539 inline bool is_text_node(xml_node_struct* node)
4540 {
4541 xml_node_type type = PUGI_IMPL_NODETYPE(node);
4542
4543 return type == node_pcdata || type == node_cdata;
4544 }
4545
4546 // get value with conversion functions
4547 template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4548 {
4549 U result = 0;
4550 const char_t* s = value;
4551
4552 while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space))
4553 s++;
4554
4555 bool negative = (*s == '-');
4556
4557 s += (*s == '+' || *s == '-');
4558
4559 bool overflow = false;
4560
4561 if (s[0] == '0' && (s[1] | ' ') == 'x')
4562 {
4563 s += 2;
4564
4565 // since overflow detection relies on length of the sequence skip leading zeros
4566 while (*s == '0')
4567 s++;
4568
4569 const char_t* start = s;
4570
4571 for (;;)
4572 {
4573 if (static_cast<unsigned>(*s - '0') < 10)
4574 result = result * 16 + (*s - '0');
4575 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4576 result = result * 16 + ((*s | ' ') - 'a' + 10);
4577 else
4578 break;
4579
4580 s++;
4581 }
4582
4583 size_t digits = static_cast<size_t>(s - start);
4584
4585 overflow = digits > sizeof(U) * 2;
4586 }
4587 else
4588 {
4589 // since overflow detection relies on length of the sequence skip leading zeros
4590 while (*s == '0')
4591 s++;
4592
4593 const char_t* start = s;
4594
4595 for (;;)
4596 {
4597 if (static_cast<unsigned>(*s - '0') < 10)
4598 result = result * 10 + (*s - '0');
4599 else
4600 break;
4601
4602 s++;
4603 }
4604
4605 size_t digits = static_cast<size_t>(s - start);
4606
4607 PUGI_IMPL_STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4608
4609 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4610 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4611 const size_t high_bit = sizeof(U) * 8 - 1;
4612
4613 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4614 }
4615
4616 if (negative)
4617 {
4618 // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4619 #ifdef _CRAYC
4620 return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4621 #else
4622 return (overflow || result > 0 - minv) ? minv : 0 - result;
4623 #endif
4624 }
4625 else
4626 return (overflow || result > maxv) ? maxv : result;
4627 }
4628
4629 PUGI_IMPL_FN int get_value_int(const char_t* value)
4630 {
4631 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4632 }
4633
4634 PUGI_IMPL_FN unsigned int get_value_uint(const char_t* value)
4635 {
4636 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4637 }
4638
4639 PUGI_IMPL_FN double get_value_double(const char_t* value)
4640 {
4641 #ifdef PUGIXML_WCHAR_MODE
4642 return wcstod(value, 0);
4643 #else
4644 return strtod(value, 0);
4645 #endif
4646 }
4647
4648 PUGI_IMPL_FN float get_value_float(const char_t* value)
4649 {
4650 #ifdef PUGIXML_WCHAR_MODE
4651 return static_cast<float>(wcstod(value, 0));
4652 #else
4653 return static_cast<float>(strtod(value, 0));
4654 #endif
4655 }
4656
4657 PUGI_IMPL_FN bool get_value_bool(const char_t* value)
4658 {
4659 // only look at first char
4660 char_t first = *value;
4661
4662 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4663 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4664 }
4665
4666 #ifdef PUGIXML_HAS_LONG_LONG
4667 PUGI_IMPL_FN long long get_value_llong(const char_t* value)
4668 {
4669 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4670 }
4671
4672 PUGI_IMPL_FN unsigned long long get_value_ullong(const char_t* value)
4673 {
4674 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4675 }
4676 #endif
4677
4678 template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4679 {
4680 char_t* result = end - 1;
4681 U rest = negative ? 0 - value : value;
4682
4683 do
4684 {
4685 *result-- = static_cast<char_t>('0' + (rest % 10));
4686 rest /= 10;
4687 }
4688 while (rest);
4689
4690 assert(result >= begin);
4691 (void)begin;
4692
4693 *result = '-';
4694
4695 return result + !negative;
4696 }
4697
4698 // set value with conversion functions
4699 template <typename String, typename Header>
4700 PUGI_IMPL_FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4701 {
4702 #ifdef PUGIXML_WCHAR_MODE
4703 char_t wbuf[128];
4704 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4705
4706 size_t offset = 0;
4707 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4708
4709 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4710 #else
4711 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4712 #endif
4713 }
4714
4715 template <typename U, typename String, typename Header>
4716 PUGI_IMPL_FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4717 {
4718 char_t buf[64];
4719 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4720 char_t* begin = integer_to_string(buf, end, value, negative);
4721
4722 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4723 }
4724
4725 template <typename String, typename Header>
4726 PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4727 {
4728 char buf[128];
4729 PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, double(value));
4730
4731 return set_value_ascii(dest, header, header_mask, buf);
4732 }
4733
4734 template <typename String, typename Header>
4735 PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4736 {
4737 char buf[128];
4738 PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, value);
4739
4740 return set_value_ascii(dest, header, header_mask, buf);
4741 }
4742
4743 template <typename String, typename Header>
4744 PUGI_IMPL_FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4745 {
4746 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4747 }
4748
4749 PUGI_IMPL_FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4750 {
4751 // check input buffer
4752 if (!contents && size) return make_parse_result(status_io_error);
4753
4754 // get actual encoding
4755 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4756
4757 // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it
4758 auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate);
4759
4760 // get private buffer
4761 char_t* buffer = 0;
4762 size_t length = 0;
4763
4764 // coverity[var_deref_model]
4765 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4766
4767 // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it
4768 contents_guard.release();
4769
4770 // delete original buffer if we performed a conversion
4771 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4772
4773 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4774 if (own || buffer != contents) *out_buffer = buffer;
4775
4776 // store buffer for offset_debug
4777 doc->buffer = buffer;
4778
4779 // parse
4780 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4781
4782 // remember encoding
4783 res.encoding = buffer_encoding;
4784
4785 return res;
4786 }
4787
4788 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4789 PUGI_IMPL_FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4790 {
4791 #if defined(__linux__) || defined(__APPLE__)
4792 // this simultaneously retrieves the file size and file mode (to guard against loading non-files)
4793 struct stat st;
4794 if (fstat(fileno(file), &st) != 0) return status_io_error;
4795
4796 // anything that's not a regular file doesn't have a coherent length
4797 if (!S_ISREG(st.st_mode)) return status_io_error;
4798
4799 typedef off_t length_type;
4800 length_type length = st.st_size;
4801 #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
4802 // there are 64-bit versions of fseek/ftell, let's use them
4803 typedef __int64 length_type;
4804
4805 _fseeki64(file, 0, SEEK_END);
4806 length_type length = _ftelli64(file);
4807 _fseeki64(file, 0, SEEK_SET);
4808 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4809 // there are 64-bit versions of fseek/ftell, let's use them
4810 typedef off64_t length_type;
4811
4812 fseeko64(file, 0, SEEK_END);
4813 length_type length = ftello64(file);
4814 fseeko64(file, 0, SEEK_SET);
4815 #else
4816 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4817 typedef long length_type;
4818
4819 fseek(file, 0, SEEK_END);
4820 length_type length = ftell(file);
4821 fseek(file, 0, SEEK_SET);
4822 #endif
4823
4824 // check for I/O errors
4825 if (length < 0) return status_io_error;
4826
4827 // check for overflow
4828 size_t result = static_cast<size_t>(length);
4829
4830 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4831
4832 // finalize
4833 out_result = result;
4834
4835 return status_ok;
4836 }
4837
4838 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4839 PUGI_IMPL_FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4840 {
4841 // We only need to zero-terminate if encoding conversion does not do it for us
4842 #ifdef PUGIXML_WCHAR_MODE
4843 xml_encoding wchar_encoding = get_wchar_encoding();
4844
4845 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4846 {
4847 size_t length = size / sizeof(char_t);
4848
4849 static_cast<char_t*>(buffer)[length] = 0;
4850 return (length + 1) * sizeof(char_t);
4851 }
4852 #else
4853 if (encoding == encoding_utf8)
4854 {
4855 static_cast<char*>(buffer)[size] = 0;
4856 return size + 1;
4857 }
4858 #endif
4859
4860 return size;
4861 }
4862
4863 PUGI_IMPL_FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4864 {
4865 if (!file) return make_parse_result(status_file_not_found);
4866
4867 // get file size (can result in I/O errors)
4868 size_t size = 0;
4869 xml_parse_status size_status = get_file_size(file, size);
4870 if (size_status != status_ok) return make_parse_result(size_status);
4871
4872 size_t max_suffix_size = sizeof(char_t);
4873
4874 // allocate buffer for the whole file
4875 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4876 if (!contents) return make_parse_result(status_out_of_memory);
4877
4878 // read file in memory
4879 size_t read_size = fread(contents, 1, size, file);
4880
4881 if (read_size != size)
4882 {
4883 xml_memory::deallocate(contents);
4884 return make_parse_result(status_io_error);
4885 }
4886
4887 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4888
4889 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4890 }
4891
4892 PUGI_IMPL_FN void close_file(FILE* file)
4893 {
4894 fclose(file);
4895 }
4896
4897 #ifndef PUGIXML_NO_STL
4898 template <typename T> struct xml_stream_chunk
4899 {
4900 static xml_stream_chunk* create()
4901 {
4902 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4903 if (!memory) return 0;
4904
4905 return new (memory) xml_stream_chunk();
4906 }
4907
4908 static void destroy(xml_stream_chunk* chunk)
4909 {
4910 // free chunk chain
4911 while (chunk)
4912 {
4913 xml_stream_chunk* next_ = chunk->next;
4914
4915 xml_memory::deallocate(chunk);
4916
4917 chunk = next_;
4918 }
4919 }
4920
4921 xml_stream_chunk(): next(0), size(0)
4922 {
4923 }
4924
4925 xml_stream_chunk* next;
4926 size_t size;
4927
4928 T data[xml_memory_page_size / sizeof(T)];
4929 };
4930
4931 template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4932 {
4933 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4934
4935 // read file to a chunk list
4936 size_t total = 0;
4937 xml_stream_chunk<T>* last = 0;
4938
4939 while (!stream.eof())
4940 {
4941 // allocate new chunk
4942 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4943 if (!chunk) return status_out_of_memory;
4944
4945 // append chunk to list
4946 if (last) last = last->next = chunk;
4947 else chunks.data = last = chunk;
4948
4949 // read data to chunk
4950 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4951 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4952
4953 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4954 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4955
4956 // guard against huge files (chunk size is small enough to make this overflow check work)
4957 if (total + chunk->size < total) return status_out_of_memory;
4958 total += chunk->size;
4959 }
4960
4961 size_t max_suffix_size = sizeof(char_t);
4962
4963 // copy chunk list to a contiguous buffer
4964 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4965 if (!buffer) return status_out_of_memory;
4966
4967 char* write = buffer;
4968
4969 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4970 {
4971 assert(write + chunk->size <= buffer + total);
4972 memcpy(write, chunk->data, chunk->size);
4973 write += chunk->size;
4974 }
4975
4976 assert(write == buffer + total);
4977
4978 // return buffer
4979 *out_buffer = buffer;
4980 *out_size = total;
4981
4982 return status_ok;
4983 }
4984
4985 template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4986 {
4987 // get length of remaining data in stream
4988 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4989 stream.seekg(0, std::ios::end);
4990 std::streamoff length = stream.tellg() - pos;
4991 stream.seekg(pos);
4992
4993 if (stream.fail() || pos < 0) return status_io_error;
4994
4995 // guard against huge files
4996 size_t read_length = static_cast<size_t>(length);
4997
4998 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4999
5000 size_t max_suffix_size = sizeof(char_t);
5001
5002 // read stream data into memory (guard against stream exceptions with buffer holder)
5003 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
5004 if (!buffer.data) return status_out_of_memory;
5005
5006 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
5007
5008 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
5009 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
5010
5011 // return buffer
5012 size_t actual_length = static_cast<size_t>(stream.gcount());
5013 assert(actual_length <= read_length);
5014
5015 *out_buffer = buffer.release();
5016 *out_size = actual_length * sizeof(T);
5017
5018 return status_ok;
5019 }
5020
5021 template <typename T> PUGI_IMPL_FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
5022 {
5023 void* buffer = 0;
5024 size_t size = 0;
5025 xml_parse_status status = status_ok;
5026
5027 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
5028 if (stream.fail()) return make_parse_result(status_io_error);
5029
5030 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
5031 if (stream.tellg() < 0)
5032 {
5033 stream.clear(); // clear error flags that could be set by a failing tellg
5034 status = load_stream_data_noseek(stream, &buffer, &size);
5035 }
5036 else
5037 status = load_stream_data_seek(stream, &buffer, &size);
5038
5039 if (status != status_ok) return make_parse_result(status);
5040
5041 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
5042
5043 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
5044 }
5045 #endif
5046
5047 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
5048 PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5049 {
5050 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
5051 FILE* file = 0;
5052 return _wfopen_s(&file, path, mode) == 0 ? file : 0;
5053 #else
5054 return _wfopen(path, mode);
5055 #endif
5056 }
5057 #else
5058 PUGI_IMPL_FN char* convert_path_heap(const wchar_t* str)
5059 {
5060 assert(str);
5061
5062 // first pass: get length in utf8 characters
5063 size_t length = strlength_wide(str);
5064 size_t size = as_utf8_begin(str, length);
5065
5066 // allocate resulting string
5067 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5068 if (!result) return 0;
5069
5070 // second pass: convert to utf8
5071 as_utf8_end(result, size, str, length);
5072
5073 // zero-terminate
5074 result[size] = 0;
5075
5076 return result;
5077 }
5078
5079 PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5080 {
5081 // there is no standard function to open wide paths, so our best bet is to try utf8 path
5082 char* path_utf8 = convert_path_heap(path);
5083 if (!path_utf8) return 0;
5084
5085 // convert mode to ASCII (we mirror _wfopen interface)
5086 char mode_ascii[4] = {0};
5087 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5088
5089 // try to open the utf8 path
5090 FILE* result = fopen(path_utf8, mode_ascii);
5091
5092 // free dummy buffer
5093 xml_memory::deallocate(path_utf8);
5094
5095 return result;
5096 }
5097 #endif
5098
5099 PUGI_IMPL_FN FILE* open_file(const char* path, const char* mode)
5100 {
5101 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
5102 FILE* file = 0;
5103 return fopen_s(&file, path, mode) == 0 ? file : 0;
5104 #else
5105 return fopen(path, mode);
5106 #endif
5107 }
5108
5109 PUGI_IMPL_FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5110 {
5111 if (!file) return false;
5112
5113 xml_writer_file writer(file);
5114 doc.save(writer, indent, flags, encoding);
5115
5116 return fflush(file) == 0 && ferror(file) == 0;
5117 }
5118
5119 struct name_null_sentry
5120 {
5121 xml_node_struct* node;
5122 char_t* name;
5123
5124 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5125 {
5126 node->name = 0;
5127 }
5128
5129 ~name_null_sentry()
5130 {
5131 node->name = name;
5132 }
5133 };
5134 PUGI_IMPL_NS_END
5135
5136 namespace pugi
5137 {
5138 PUGI_IMPL_FN xml_writer::~xml_writer()
5139 {
5140 }
5141
5142 PUGI_IMPL_FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5143 {
5144 }
5145
5146 PUGI_IMPL_FN void xml_writer_file::write(const void* data, size_t size)
5147 {
5148 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5149 (void)!result; // unfortunately we can't do proper error handling here
5150 }
5151
5152 #ifndef PUGIXML_NO_STL
5153 PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5154 {
5155 }
5156
5157 PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5158 {
5159 }
5160
5161 PUGI_IMPL_FN void xml_writer_stream::write(const void* data, size_t size)
5162 {
5163 if (narrow_stream)
5164 {
5165 assert(!wide_stream);
5166 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5167 }
5168 else
5169 {
5170 assert(wide_stream);
5171 assert(size % sizeof(wchar_t) == 0);
5172
5173 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5174 }
5175 }
5176 #endif
5177
5178 PUGI_IMPL_FN xml_tree_walker::xml_tree_walker(): _depth(0)
5179 {
5180 }
5181
5182 PUGI_IMPL_FN xml_tree_walker::~xml_tree_walker()
5183 {
5184 }
5185
5186 PUGI_IMPL_FN int xml_tree_walker::depth() const
5187 {
5188 return _depth;
5189 }
5190
5191 PUGI_IMPL_FN bool xml_tree_walker::begin(xml_node&)
5192 {
5193 return true;
5194 }
5195
5196 PUGI_IMPL_FN bool xml_tree_walker::end(xml_node&)
5197 {
5198 return true;
5199 }
5200
5201 PUGI_IMPL_FN xml_attribute::xml_attribute(): _attr(0)
5202 {
5203 }
5204
5205 PUGI_IMPL_FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5206 {
5207 }
5208
5209 PUGI_IMPL_FN static void unspecified_bool_xml_attribute(xml_attribute***)
5210 {
5211 }
5212
5213 PUGI_IMPL_FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5214 {
5215 return _attr ? unspecified_bool_xml_attribute : 0;
5216 }
5217
5218 PUGI_IMPL_FN bool xml_attribute::operator!() const
5219 {
5220 return !_attr;
5221 }
5222
5223 PUGI_IMPL_FN bool xml_attribute::operator==(const xml_attribute& r) const
5224 {
5225 return (_attr == r._attr);
5226 }
5227
5228 PUGI_IMPL_FN bool xml_attribute::operator!=(const xml_attribute& r) const
5229 {
5230 return (_attr != r._attr);
5231 }
5232
5233 PUGI_IMPL_FN bool xml_attribute::operator<(const xml_attribute& r) const
5234 {
5235 return (_attr < r._attr);
5236 }
5237
5238 PUGI_IMPL_FN bool xml_attribute::operator>(const xml_attribute& r) const
5239 {
5240 return (_attr > r._attr);
5241 }
5242
5243 PUGI_IMPL_FN bool xml_attribute::operator<=(const xml_attribute& r) const
5244 {
5245 return (_attr <= r._attr);
5246 }
5247
5248 PUGI_IMPL_FN bool xml_attribute::operator>=(const xml_attribute& r) const
5249 {
5250 return (_attr >= r._attr);
5251 }
5252
5253 PUGI_IMPL_FN xml_attribute xml_attribute::next_attribute() const
5254 {
5255 if (!_attr) return xml_attribute();
5256 return xml_attribute(_attr->next_attribute);
5257 }
5258
5259 PUGI_IMPL_FN xml_attribute xml_attribute::previous_attribute() const
5260 {
5261 if (!_attr) return xml_attribute();
5262 xml_attribute_struct* prev = _attr->prev_attribute_c;
5263 return prev->next_attribute ? xml_attribute(prev) : xml_attribute();
5264 }
5265
5266 PUGI_IMPL_FN const char_t* xml_attribute::as_string(const char_t* def) const
5267 {
5268 if (!_attr) return def;
5269 const char_t* value = _attr->value;
5270 return value ? value : def;
5271 }
5272
5273 PUGI_IMPL_FN int xml_attribute::as_int(int def) const
5274 {
5275 if (!_attr) return def;
5276 const char_t* value = _attr->value;
5277 return value ? impl::get_value_int(value) : def;
5278 }
5279
5280 PUGI_IMPL_FN unsigned int xml_attribute::as_uint(unsigned int def) const
5281 {
5282 if (!_attr) return def;
5283 const char_t* value = _attr->value;
5284 return value ? impl::get_value_uint(value) : def;
5285 }
5286
5287 PUGI_IMPL_FN double xml_attribute::as_double(double def) const
5288 {
5289 if (!_attr) return def;
5290 const char_t* value = _attr->value;
5291 return value ? impl::get_value_double(value) : def;
5292 }
5293
5294 PUGI_IMPL_FN float xml_attribute::as_float(float def) const
5295 {
5296 if (!_attr) return def;
5297 const char_t* value = _attr->value;
5298 return value ? impl::get_value_float(value) : def;
5299 }
5300
5301 PUGI_IMPL_FN bool xml_attribute::as_bool(bool def) const
5302 {
5303 if (!_attr) return def;
5304 const char_t* value = _attr->value;
5305 return value ? impl::get_value_bool(value) : def;
5306 }
5307
5308 #ifdef PUGIXML_HAS_LONG_LONG
5309 PUGI_IMPL_FN long long xml_attribute::as_llong(long long def) const
5310 {
5311 if (!_attr) return def;
5312 const char_t* value = _attr->value;
5313 return value ? impl::get_value_llong(value) : def;
5314 }
5315
5316 PUGI_IMPL_FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5317 {
5318 if (!_attr) return def;
5319 const char_t* value = _attr->value;
5320 return value ? impl::get_value_ullong(value) : def;
5321 }
5322 #endif
5323
5324 PUGI_IMPL_FN bool xml_attribute::empty() const
5325 {
5326 return !_attr;
5327 }
5328
5329 PUGI_IMPL_FN const char_t* xml_attribute::name() const
5330 {
5331 if (!_attr) return PUGIXML_TEXT("");
5332 const char_t* name = _attr->name;
5333 return name ? name : PUGIXML_TEXT("");
5334 }
5335
5336 PUGI_IMPL_FN const char_t* xml_attribute::value() const
5337 {
5338 if (!_attr) return PUGIXML_TEXT("");
5339 const char_t* value = _attr->value;
5340 return value ? value : PUGIXML_TEXT("");
5341 }
5342
5343 PUGI_IMPL_FN size_t xml_attribute::hash_value() const
5344 {
5345 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5346 }
5347
5348 PUGI_IMPL_FN xml_attribute_struct* xml_attribute::internal_object() const
5349 {
5350 return _attr;
5351 }
5352
5353 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5354 {
5355 set_value(rhs);
5356 return *this;
5357 }
5358
5359 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(int rhs)
5360 {
5361 set_value(rhs);
5362 return *this;
5363 }
5364
5365 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5366 {
5367 set_value(rhs);
5368 return *this;
5369 }
5370
5371 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long rhs)
5372 {
5373 set_value(rhs);
5374 return *this;
5375 }
5376
5377 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5378 {
5379 set_value(rhs);
5380 return *this;
5381 }
5382
5383 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(double rhs)
5384 {
5385 set_value(rhs);
5386 return *this;
5387 }
5388
5389 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(float rhs)
5390 {
5391 set_value(rhs);
5392 return *this;
5393 }
5394
5395 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(bool rhs)
5396 {
5397 set_value(rhs);
5398 return *this;
5399 }
5400
5401 #ifdef PUGIXML_HAS_LONG_LONG
5402 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long long rhs)
5403 {
5404 set_value(rhs);
5405 return *this;
5406 }
5407
5408 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5409 {
5410 set_value(rhs);
5411 return *this;
5412 }
5413 #endif
5414
5415 PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs)
5416 {
5417 if (!_attr) return false;
5418
5419 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5420 }
5421
5422 PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs, size_t size)
5423 {
5424 if (!_attr) return false;
5425
5426 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, size);
5427 }
5428
5429 PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs)
5430 {
5431 if (!_attr) return false;
5432
5433 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5434 }
5435
5436 PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs, size_t size)
5437 {
5438 if (!_attr) return false;
5439
5440 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, size);
5441 }
5442
5443 PUGI_IMPL_FN bool xml_attribute::set_value(int rhs)
5444 {
5445 if (!_attr) return false;
5446
5447 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5448 }
5449
5450 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned int rhs)
5451 {
5452 if (!_attr) return false;
5453
5454 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5455 }
5456
5457 PUGI_IMPL_FN bool xml_attribute::set_value(long rhs)
5458 {
5459 if (!_attr) return false;
5460
5461 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5462 }
5463
5464 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long rhs)
5465 {
5466 if (!_attr) return false;
5467
5468 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5469 }
5470
5471 PUGI_IMPL_FN bool xml_attribute::set_value(double rhs)
5472 {
5473 if (!_attr) return false;
5474
5475 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5476 }
5477
5478 PUGI_IMPL_FN bool xml_attribute::set_value(double rhs, int precision)
5479 {
5480 if (!_attr) return false;
5481
5482 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5483 }
5484
5485 PUGI_IMPL_FN bool xml_attribute::set_value(float rhs)
5486 {
5487 if (!_attr) return false;
5488
5489 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5490 }
5491
5492 PUGI_IMPL_FN bool xml_attribute::set_value(float rhs, int precision)
5493 {
5494 if (!_attr) return false;
5495
5496 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5497 }
5498
5499 PUGI_IMPL_FN bool xml_attribute::set_value(bool rhs)
5500 {
5501 if (!_attr) return false;
5502
5503 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5504 }
5505
5506 #ifdef PUGIXML_HAS_LONG_LONG
5507 PUGI_IMPL_FN bool xml_attribute::set_value(long long rhs)
5508 {
5509 if (!_attr) return false;
5510
5511 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5512 }
5513
5514 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long long rhs)
5515 {
5516 if (!_attr) return false;
5517
5518 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5519 }
5520 #endif
5521
5522 #ifdef __BORLANDC__
5523 PUGI_IMPL_FN bool operator&&(const xml_attribute& lhs, bool rhs)
5524 {
5525 return (bool)lhs && rhs;
5526 }
5527
5528 PUGI_IMPL_FN bool operator||(const xml_attribute& lhs, bool rhs)
5529 {
5530 return (bool)lhs || rhs;
5531 }
5532 #endif
5533
5534 PUGI_IMPL_FN xml_node::xml_node(): _root(0)
5535 {
5536 }
5537
5538 PUGI_IMPL_FN xml_node::xml_node(xml_node_struct* p): _root(p)
5539 {
5540 }
5541
5542 PUGI_IMPL_FN static void unspecified_bool_xml_node(xml_node***)
5543 {
5544 }
5545
5546 PUGI_IMPL_FN xml_node::operator xml_node::unspecified_bool_type() const
5547 {
5548 return _root ? unspecified_bool_xml_node : 0;
5549 }
5550
5551 PUGI_IMPL_FN bool xml_node::operator!() const
5552 {
5553 return !_root;
5554 }
5555
5556 PUGI_IMPL_FN xml_node::iterator xml_node::begin() const
5557 {
5558 return iterator(_root ? _root->first_child + 0 : 0, _root);
5559 }
5560
5561 PUGI_IMPL_FN xml_node::iterator xml_node::end() const
5562 {
5563 return iterator(0, _root);
5564 }
5565
5566 PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_begin() const
5567 {
5568 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5569 }
5570
5571 PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_end() const
5572 {
5573 return attribute_iterator(0, _root);
5574 }
5575
5576 PUGI_IMPL_FN xml_object_range<xml_node_iterator> xml_node::children() const
5577 {
5578 return xml_object_range<xml_node_iterator>(begin(), end());
5579 }
5580
5581 PUGI_IMPL_FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5582 {
5583 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5584 }
5585
5586 PUGI_IMPL_FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5587 {
5588 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5589 }
5590
5591 PUGI_IMPL_FN bool xml_node::operator==(const xml_node& r) const
5592 {
5593 return (_root == r._root);
5594 }
5595
5596 PUGI_IMPL_FN bool xml_node::operator!=(const xml_node& r) const
5597 {
5598 return (_root != r._root);
5599 }
5600
5601 PUGI_IMPL_FN bool xml_node::operator<(const xml_node& r) const
5602 {
5603 return (_root < r._root);
5604 }
5605
5606 PUGI_IMPL_FN bool xml_node::operator>(const xml_node& r) const
5607 {
5608 return (_root > r._root);
5609 }
5610
5611 PUGI_IMPL_FN bool xml_node::operator<=(const xml_node& r) const
5612 {
5613 return (_root <= r._root);
5614 }
5615
5616 PUGI_IMPL_FN bool xml_node::operator>=(const xml_node& r) const
5617 {
5618 return (_root >= r._root);
5619 }
5620
5621 PUGI_IMPL_FN bool xml_node::empty() const
5622 {
5623 return !_root;
5624 }
5625
5626 PUGI_IMPL_FN const char_t* xml_node::name() const
5627 {
5628 if (!_root) return PUGIXML_TEXT("");
5629 const char_t* name = _root->name;
5630 return name ? name : PUGIXML_TEXT("");
5631 }
5632
5633 PUGI_IMPL_FN xml_node_type xml_node::type() const
5634 {
5635 return _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
5636 }
5637
5638 PUGI_IMPL_FN const char_t* xml_node::value() const
5639 {
5640 if (!_root) return PUGIXML_TEXT("");
5641 const char_t* value = _root->value;
5642 return value ? value : PUGIXML_TEXT("");
5643 }
5644
5645 PUGI_IMPL_FN xml_node xml_node::child(const char_t* name_) const
5646 {
5647 if (!_root) return xml_node();
5648
5649 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5650 {
5651 const char_t* iname = i->name;
5652 if (iname && impl::strequal(name_, iname))
5653 return xml_node(i);
5654 }
5655
5656 return xml_node();
5657 }
5658
5659 PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_) const
5660 {
5661 if (!_root) return xml_attribute();
5662
5663 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5664 {
5665 const char_t* iname = i->name;
5666 if (iname && impl::strequal(name_, iname))
5667 return xml_attribute(i);
5668 }
5669
5670 return xml_attribute();
5671 }
5672
5673 PUGI_IMPL_FN xml_node xml_node::next_sibling(const char_t* name_) const
5674 {
5675 if (!_root) return xml_node();
5676
5677 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5678 {
5679 const char_t* iname = i->name;
5680 if (iname && impl::strequal(name_, iname))
5681 return xml_node(i);
5682 }
5683
5684 return xml_node();
5685 }
5686
5687 PUGI_IMPL_FN xml_node xml_node::next_sibling() const
5688 {
5689 return _root ? xml_node(_root->next_sibling) : xml_node();
5690 }
5691
5692 PUGI_IMPL_FN xml_node xml_node::previous_sibling(const char_t* name_) const
5693 {
5694 if (!_root) return xml_node();
5695
5696 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5697 {
5698 const char_t* iname = i->name;
5699 if (iname && impl::strequal(name_, iname))
5700 return xml_node(i);
5701 }
5702
5703 return xml_node();
5704 }
5705
5706 PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5707 {
5708 xml_attribute_struct* hint = hint_._attr;
5709
5710 // if hint is not an attribute of node, behavior is not defined
5711 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5712
5713 if (!_root) return xml_attribute();
5714
5715 // optimistically search from hint up until the end
5716 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5717 {
5718 const char_t* iname = i->name;
5719 if (iname && impl::strequal(name_, iname))
5720 {
5721 // update hint to maximize efficiency of searching for consecutive attributes
5722 hint_._attr = i->next_attribute;
5723
5724 return xml_attribute(i);
5725 }
5726 }
5727
5728 // wrap around and search from the first attribute until the hint
5729 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5730 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5731 {
5732 const char_t* jname = j->name;
5733 if (jname && impl::strequal(name_, jname))
5734 {
5735 // update hint to maximize efficiency of searching for consecutive attributes
5736 hint_._attr = j->next_attribute;
5737
5738 return xml_attribute(j);
5739 }
5740 }
5741
5742 return xml_attribute();
5743 }
5744
5745 PUGI_IMPL_FN xml_node xml_node::previous_sibling() const
5746 {
5747 if (!_root) return xml_node();
5748 xml_node_struct* prev = _root->prev_sibling_c;
5749 return prev->next_sibling ? xml_node(prev) : xml_node();
5750 }
5751
5752 PUGI_IMPL_FN xml_node xml_node::parent() const
5753 {
5754 return _root ? xml_node(_root->parent) : xml_node();
5755 }
5756
5757 PUGI_IMPL_FN xml_node xml_node::root() const
5758 {
5759 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5760 }
5761
5762 PUGI_IMPL_FN xml_text xml_node::text() const
5763 {
5764 return xml_text(_root);
5765 }
5766
5767 PUGI_IMPL_FN const char_t* xml_node::child_value() const
5768 {
5769 if (!_root) return PUGIXML_TEXT("");
5770
5771 // element nodes can have value if parse_embed_pcdata was used
5772 if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value)
5773 return _root->value;
5774
5775 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5776 {
5777 const char_t* ivalue = i->value;
5778 if (impl::is_text_node(i) && ivalue)
5779 return ivalue;
5780 }
5781
5782 return PUGIXML_TEXT("");
5783 }
5784
5785 PUGI_IMPL_FN const char_t* xml_node::child_value(const char_t* name_) const
5786 {
5787 return child(name_).child_value();
5788 }
5789
5790 PUGI_IMPL_FN xml_attribute xml_node::first_attribute() const
5791 {
5792 if (!_root) return xml_attribute();
5793 return xml_attribute(_root->first_attribute);
5794 }
5795
5796 PUGI_IMPL_FN xml_attribute xml_node::last_attribute() const
5797 {
5798 if (!_root) return xml_attribute();
5799 xml_attribute_struct* first = _root->first_attribute;
5800 return first ? xml_attribute(first->prev_attribute_c) : xml_attribute();
5801 }
5802
5803 PUGI_IMPL_FN xml_node xml_node::first_child() const
5804 {
5805 if (!_root) return xml_node();
5806 return xml_node(_root->first_child);
5807 }
5808
5809 PUGI_IMPL_FN xml_node xml_node::last_child() const
5810 {
5811 if (!_root) return xml_node();
5812 xml_node_struct* first = _root->first_child;
5813 return first ? xml_node(first->prev_sibling_c) : xml_node();
5814 }
5815
5816 PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs)
5817 {
5818 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
5819
5820 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5821 return false;
5822
5823 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5824 }
5825
5826 PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs, size_t size)
5827 {
5828 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
5829
5830 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5831 return false;
5832
5833 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, size);
5834 }
5835
5836 PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs)
5837 {
5838 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
5839
5840 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5841 return false;
5842
5843 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5844 }
5845
5846 PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs, size_t size)
5847 {
5848 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null;
5849
5850 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5851 return false;
5852
5853 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, size);
5854 }
5855
5856 PUGI_IMPL_FN xml_attribute xml_node::append_attribute(const char_t* name_)
5857 {
5858 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5859
5860 impl::xml_allocator& alloc = impl::get_allocator(_root);
5861 if (!alloc.reserve()) return xml_attribute();
5862
5863 xml_attribute a(impl::allocate_attribute(alloc));
5864 if (!a) return xml_attribute();
5865
5866 impl::append_attribute(a._attr, _root);
5867
5868 a.set_name(name_);
5869
5870 return a;
5871 }
5872
5873 PUGI_IMPL_FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5874 {
5875 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5876
5877 impl::xml_allocator& alloc = impl::get_allocator(_root);
5878 if (!alloc.reserve()) return xml_attribute();
5879
5880 xml_attribute a(impl::allocate_attribute(alloc));
5881 if (!a) return xml_attribute();
5882
5883 impl::prepend_attribute(a._attr, _root);
5884
5885 a.set_name(name_);
5886
5887 return a;
5888 }
5889
5890 PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5891 {
5892 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5893 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5894
5895 impl::xml_allocator& alloc = impl::get_allocator(_root);
5896 if (!alloc.reserve()) return xml_attribute();
5897
5898 xml_attribute a(impl::allocate_attribute(alloc));
5899 if (!a) return xml_attribute();
5900
5901 impl::insert_attribute_after(a._attr, attr._attr, _root);
5902
5903 a.set_name(name_);
5904
5905 return a;
5906 }
5907
5908 PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5909 {
5910 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5911 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5912
5913 impl::xml_allocator& alloc = impl::get_allocator(_root);
5914 if (!alloc.reserve()) return xml_attribute();
5915
5916 xml_attribute a(impl::allocate_attribute(alloc));
5917 if (!a) return xml_attribute();
5918
5919 impl::insert_attribute_before(a._attr, attr._attr, _root);
5920
5921 a.set_name(name_);
5922
5923 return a;
5924 }
5925
5926 PUGI_IMPL_FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5927 {
5928 if (!proto) return xml_attribute();
5929 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5930
5931 impl::xml_allocator& alloc = impl::get_allocator(_root);
5932 if (!alloc.reserve()) return xml_attribute();
5933
5934 xml_attribute a(impl::allocate_attribute(alloc));
5935 if (!a) return xml_attribute();
5936
5937 impl::append_attribute(a._attr, _root);
5938 impl::node_copy_attribute(a._attr, proto._attr);
5939
5940 return a;
5941 }
5942
5943 PUGI_IMPL_FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5944 {
5945 if (!proto) return xml_attribute();
5946 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5947
5948 impl::xml_allocator& alloc = impl::get_allocator(_root);
5949 if (!alloc.reserve()) return xml_attribute();
5950
5951 xml_attribute a(impl::allocate_attribute(alloc));
5952 if (!a) return xml_attribute();
5953
5954 impl::prepend_attribute(a._attr, _root);
5955 impl::node_copy_attribute(a._attr, proto._attr);
5956
5957 return a;
5958 }
5959
5960 PUGI_IMPL_FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5961 {
5962 if (!proto) return xml_attribute();
5963 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5964 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5965
5966 impl::xml_allocator& alloc = impl::get_allocator(_root);
5967 if (!alloc.reserve()) return xml_attribute();
5968
5969 xml_attribute a(impl::allocate_attribute(alloc));
5970 if (!a) return xml_attribute();
5971
5972 impl::insert_attribute_after(a._attr, attr._attr, _root);
5973 impl::node_copy_attribute(a._attr, proto._attr);
5974
5975 return a;
5976 }
5977
5978 PUGI_IMPL_FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5979 {
5980 if (!proto) return xml_attribute();
5981 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5982 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5983
5984 impl::xml_allocator& alloc = impl::get_allocator(_root);
5985 if (!alloc.reserve()) return xml_attribute();
5986
5987 xml_attribute a(impl::allocate_attribute(alloc));
5988 if (!a) return xml_attribute();
5989
5990 impl::insert_attribute_before(a._attr, attr._attr, _root);
5991 impl::node_copy_attribute(a._attr, proto._attr);
5992
5993 return a;
5994 }
5995
5996 PUGI_IMPL_FN xml_node xml_node::append_child(xml_node_type type_)
5997 {
5998 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5999
6000 impl::xml_allocator& alloc = impl::get_allocator(_root);
6001 if (!alloc.reserve()) return xml_node();
6002
6003 xml_node n(impl::allocate_node(alloc, type_));
6004 if (!n) return xml_node();
6005
6006 impl::append_node(n._root, _root);
6007
6008 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
6009
6010 return n;
6011 }
6012
6013 PUGI_IMPL_FN xml_node xml_node::prepend_child(xml_node_type type_)
6014 {
6015 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6016
6017 impl::xml_allocator& alloc = impl::get_allocator(_root);
6018 if (!alloc.reserve()) return xml_node();
6019
6020 xml_node n(impl::allocate_node(alloc, type_));
6021 if (!n) return xml_node();
6022
6023 impl::prepend_node(n._root, _root);
6024
6025 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
6026
6027 return n;
6028 }
6029
6030 PUGI_IMPL_FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
6031 {
6032 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6033 if (!node._root || node._root->parent != _root) return xml_node();
6034
6035 impl::xml_allocator& alloc = impl::get_allocator(_root);
6036 if (!alloc.reserve()) return xml_node();
6037
6038 xml_node n(impl::allocate_node(alloc, type_));
6039 if (!n) return xml_node();
6040
6041 impl::insert_node_before(n._root, node._root);
6042
6043 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
6044
6045 return n;
6046 }
6047
6048 PUGI_IMPL_FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
6049 {
6050 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6051 if (!node._root || node._root->parent != _root) return xml_node();
6052
6053 impl::xml_allocator& alloc = impl::get_allocator(_root);
6054 if (!alloc.reserve()) return xml_node();
6055
6056 xml_node n(impl::allocate_node(alloc, type_));
6057 if (!n) return xml_node();
6058
6059 impl::insert_node_after(n._root, node._root);
6060
6061 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
6062
6063 return n;
6064 }
6065
6066 PUGI_IMPL_FN xml_node xml_node::append_child(const char_t* name_)
6067 {
6068 xml_node result = append_child(node_element);
6069
6070 result.set_name(name_);
6071
6072 return result;
6073 }
6074
6075 PUGI_IMPL_FN xml_node xml_node::prepend_child(const char_t* name_)
6076 {
6077 xml_node result = prepend_child(node_element);
6078
6079 result.set_name(name_);
6080
6081 return result;
6082 }
6083
6084 PUGI_IMPL_FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
6085 {
6086 xml_node result = insert_child_after(node_element, node);
6087
6088 result.set_name(name_);
6089
6090 return result;
6091 }
6092
6093 PUGI_IMPL_FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
6094 {
6095 xml_node result = insert_child_before(node_element, node);
6096
6097 result.set_name(name_);
6098
6099 return result;
6100 }
6101
6102 PUGI_IMPL_FN xml_node xml_node::append_copy(const xml_node& proto)
6103 {
6104 xml_node_type type_ = proto.type();
6105 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6106
6107 impl::xml_allocator& alloc = impl::get_allocator(_root);
6108 if (!alloc.reserve()) return xml_node();
6109
6110 xml_node n(impl::allocate_node(alloc, type_));
6111 if (!n) return xml_node();
6112
6113 impl::append_node(n._root, _root);
6114 impl::node_copy_tree(n._root, proto._root);
6115
6116 return n;
6117 }
6118
6119 PUGI_IMPL_FN xml_node xml_node::prepend_copy(const xml_node& proto)
6120 {
6121 xml_node_type type_ = proto.type();
6122 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6123
6124 impl::xml_allocator& alloc = impl::get_allocator(_root);
6125 if (!alloc.reserve()) return xml_node();
6126
6127 xml_node n(impl::allocate_node(alloc, type_));
6128 if (!n) return xml_node();
6129
6130 impl::prepend_node(n._root, _root);
6131 impl::node_copy_tree(n._root, proto._root);
6132
6133 return n;
6134 }
6135
6136 PUGI_IMPL_FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
6137 {
6138 xml_node_type type_ = proto.type();
6139 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6140 if (!node._root || node._root->parent != _root) return xml_node();
6141
6142 impl::xml_allocator& alloc = impl::get_allocator(_root);
6143 if (!alloc.reserve()) return xml_node();
6144
6145 xml_node n(impl::allocate_node(alloc, type_));
6146 if (!n) return xml_node();
6147
6148 impl::insert_node_after(n._root, node._root);
6149 impl::node_copy_tree(n._root, proto._root);
6150
6151 return n;
6152 }
6153
6154 PUGI_IMPL_FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
6155 {
6156 xml_node_type type_ = proto.type();
6157 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6158 if (!node._root || node._root->parent != _root) return xml_node();
6159
6160 impl::xml_allocator& alloc = impl::get_allocator(_root);
6161 if (!alloc.reserve()) return xml_node();
6162
6163 xml_node n(impl::allocate_node(alloc, type_));
6164 if (!n) return xml_node();
6165
6166 impl::insert_node_before(n._root, node._root);
6167 impl::node_copy_tree(n._root, proto._root);
6168
6169 return n;
6170 }
6171
6172 PUGI_IMPL_FN xml_node xml_node::append_move(const xml_node& moved)
6173 {
6174 if (!impl::allow_move(*this, moved)) return xml_node();
6175
6176 impl::xml_allocator& alloc = impl::get_allocator(_root);
6177 if (!alloc.reserve()) return xml_node();
6178
6179 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6180 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6181
6182 impl::remove_node(moved._root);
6183 impl::append_node(moved._root, _root);
6184
6185 return moved;
6186 }
6187
6188 PUGI_IMPL_FN xml_node xml_node::prepend_move(const xml_node& moved)
6189 {
6190 if (!impl::allow_move(*this, moved)) return xml_node();
6191
6192 impl::xml_allocator& alloc = impl::get_allocator(_root);
6193 if (!alloc.reserve()) return xml_node();
6194
6195 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6196 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6197
6198 impl::remove_node(moved._root);
6199 impl::prepend_node(moved._root, _root);
6200
6201 return moved;
6202 }
6203
6204 PUGI_IMPL_FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6205 {
6206 if (!impl::allow_move(*this, moved)) return xml_node();
6207 if (!node._root || node._root->parent != _root) return xml_node();
6208 if (moved._root == node._root) return xml_node();
6209
6210 impl::xml_allocator& alloc = impl::get_allocator(_root);
6211 if (!alloc.reserve()) return xml_node();
6212
6213 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6214 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6215
6216 impl::remove_node(moved._root);
6217 impl::insert_node_after(moved._root, node._root);
6218
6219 return moved;
6220 }
6221
6222 PUGI_IMPL_FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6223 {
6224 if (!impl::allow_move(*this, moved)) return xml_node();
6225 if (!node._root || node._root->parent != _root) return xml_node();
6226 if (moved._root == node._root) return xml_node();
6227
6228 impl::xml_allocator& alloc = impl::get_allocator(_root);
6229 if (!alloc.reserve()) return xml_node();
6230
6231 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6232 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6233
6234 impl::remove_node(moved._root);
6235 impl::insert_node_before(moved._root, node._root);
6236
6237 return moved;
6238 }
6239
6240 PUGI_IMPL_FN bool xml_node::remove_attribute(const char_t* name_)
6241 {
6242 return remove_attribute(attribute(name_));
6243 }
6244
6245 PUGI_IMPL_FN bool xml_node::remove_attribute(const xml_attribute& a)
6246 {
6247 if (!_root || !a._attr) return false;
6248 if (!impl::is_attribute_of(a._attr, _root)) return false;
6249
6250 impl::xml_allocator& alloc = impl::get_allocator(_root);
6251 if (!alloc.reserve()) return false;
6252
6253 impl::remove_attribute(a._attr, _root);
6254 impl::destroy_attribute(a._attr, alloc);
6255
6256 return true;
6257 }
6258
6259 PUGI_IMPL_FN bool xml_node::remove_attributes()
6260 {
6261 if (!_root) return false;
6262
6263 impl::xml_allocator& alloc = impl::get_allocator(_root);
6264 if (!alloc.reserve()) return false;
6265
6266 for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6267 {
6268 xml_attribute_struct* next = attr->next_attribute;
6269
6270 impl::destroy_attribute(attr, alloc);
6271
6272 attr = next;
6273 }
6274
6275 _root->first_attribute = 0;
6276
6277 return true;
6278 }
6279
6280 PUGI_IMPL_FN bool xml_node::remove_child(const char_t* name_)
6281 {
6282 return remove_child(child(name_));
6283 }
6284
6285 PUGI_IMPL_FN bool xml_node::remove_child(const xml_node& n)
6286 {
6287 if (!_root || !n._root || n._root->parent != _root) return false;
6288
6289 impl::xml_allocator& alloc = impl::get_allocator(_root);
6290 if (!alloc.reserve()) return false;
6291
6292 impl::remove_node(n._root);
6293 impl::destroy_node(n._root, alloc);
6294
6295 return true;
6296 }
6297
6298 PUGI_IMPL_FN bool xml_node::remove_children()
6299 {
6300 if (!_root) return false;
6301
6302 impl::xml_allocator& alloc = impl::get_allocator(_root);
6303 if (!alloc.reserve()) return false;
6304
6305 for (xml_node_struct* cur = _root->first_child; cur; )
6306 {
6307 xml_node_struct* next = cur->next_sibling;
6308
6309 impl::destroy_node(cur, alloc);
6310
6311 cur = next;
6312 }
6313
6314 _root->first_child = 0;
6315
6316 return true;
6317 }
6318
6319 PUGI_IMPL_FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6320 {
6321 // append_buffer is only valid for elements/documents
6322 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6323
6324 // append buffer can not merge PCDATA into existing PCDATA nodes
6325 if ((options & parse_merge_pcdata) != 0 && last_child().type() == node_pcdata) return impl::make_parse_result(status_append_invalid_root);
6326
6327 // get document node
6328 impl::xml_document_struct* doc = &impl::get_document(_root);
6329
6330 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6331 doc->header |= impl::xml_memory_page_contents_shared_mask;
6332
6333 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6334 impl::xml_memory_page* page = 0;
6335 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6336 (void)page;
6337
6338 if (!extra) return impl::make_parse_result(status_out_of_memory);
6339
6340 #ifdef PUGIXML_COMPACT
6341 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6342 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6343 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6344 #endif
6345
6346 // add extra buffer to the list
6347 extra->buffer = 0;
6348 extra->next = doc->extra_buffers;
6349 doc->extra_buffers = extra;
6350
6351 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6352 impl::name_null_sentry sentry(_root);
6353
6354 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6355 }
6356
6357 PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6358 {
6359 if (!_root) return xml_node();
6360
6361 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6362 {
6363 const char_t* iname = i->name;
6364 if (iname && impl::strequal(name_, iname))
6365 {
6366 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6367 {
6368 const char_t* aname = a->name;
6369 if (aname && impl::strequal(attr_name, aname))
6370 {
6371 const char_t* avalue = a->value;
6372 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
6373 return xml_node(i);
6374 }
6375 }
6376 }
6377 }
6378
6379 return xml_node();
6380 }
6381
6382 PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6383 {
6384 if (!_root) return xml_node();
6385
6386 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6387 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6388 {
6389 const char_t* aname = a->name;
6390 if (aname && impl::strequal(attr_name, aname))
6391 {
6392 const char_t* avalue = a->value;
6393 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
6394 return xml_node(i);
6395 }
6396 }
6397
6398 return xml_node();
6399 }
6400
6401 #ifndef PUGIXML_NO_STL
6402 PUGI_IMPL_FN string_t xml_node::path(char_t delimiter) const
6403 {
6404 if (!_root) return string_t();
6405
6406 size_t offset = 0;
6407
6408 for (xml_node_struct* i = _root; i; i = i->parent)
6409 {
6410 const char_t* iname = i->name;
6411 offset += (i != _root);
6412 offset += iname ? impl::strlength(iname) : 0;
6413 }
6414
6415 string_t result;
6416 result.resize(offset);
6417
6418 for (xml_node_struct* j = _root; j; j = j->parent)
6419 {
6420 if (j != _root)
6421 result[--offset] = delimiter;
6422
6423 const char_t* jname = j->name;
6424 if (jname)
6425 {
6426 size_t length = impl::strlength(jname);
6427
6428 offset -= length;
6429 memcpy(&result[offset], jname, length * sizeof(char_t));
6430 }
6431 }
6432
6433 assert(offset == 0);
6434
6435 return result;
6436 }
6437 #endif
6438
6439 PUGI_IMPL_FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6440 {
6441 xml_node context = path_[0] == delimiter ? root() : *this;
6442
6443 if (!context._root) return xml_node();
6444
6445 const char_t* path_segment = path_;
6446
6447 while (*path_segment == delimiter) ++path_segment;
6448
6449 const char_t* path_segment_end = path_segment;
6450
6451 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6452
6453 if (path_segment == path_segment_end) return context;
6454
6455 const char_t* next_segment = path_segment_end;
6456
6457 while (*next_segment == delimiter) ++next_segment;
6458
6459 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6460 return context.first_element_by_path(next_segment, delimiter);
6461 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6462 return context.parent().first_element_by_path(next_segment, delimiter);
6463 else
6464 {
6465 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6466 {
6467 const char_t* jname = j->name;
6468 if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6469 {
6470 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6471
6472 if (subsearch) return subsearch;
6473 }
6474 }
6475
6476 return xml_node();
6477 }
6478 }
6479
6480 PUGI_IMPL_FN bool xml_node::traverse(xml_tree_walker& walker)
6481 {
6482 walker._depth = -1;
6483
6484 xml_node arg_begin(_root);
6485 if (!walker.begin(arg_begin)) return false;
6486
6487 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6488
6489 if (cur)
6490 {
6491 ++walker._depth;
6492
6493 do
6494 {
6495 xml_node arg_for_each(cur);
6496 if (!walker.for_each(arg_for_each))
6497 return false;
6498
6499 if (cur->first_child)
6500 {
6501 ++walker._depth;
6502 cur = cur->first_child;
6503 }
6504 else if (cur->next_sibling)
6505 cur = cur->next_sibling;
6506 else
6507 {
6508 while (!cur->next_sibling && cur != _root && cur->parent)
6509 {
6510 --walker._depth;
6511 cur = cur->parent;
6512 }
6513
6514 if (cur != _root)
6515 cur = cur->next_sibling;
6516 }
6517 }
6518 while (cur && cur != _root);
6519 }
6520
6521 assert(walker._depth == -1);
6522
6523 xml_node arg_end(_root);
6524 return walker.end(arg_end);
6525 }
6526
6527 PUGI_IMPL_FN size_t xml_node::hash_value() const
6528 {
6529 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6530 }
6531
6532 PUGI_IMPL_FN xml_node_struct* xml_node::internal_object() const
6533 {
6534 return _root;
6535 }
6536
6537 PUGI_IMPL_FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6538 {
6539 if (!_root) return;
6540
6541 impl::xml_buffered_writer buffered_writer(writer, encoding);
6542
6543 impl::node_output(buffered_writer, _root, indent, flags, depth);
6544
6545 buffered_writer.flush();
6546 }
6547
6548 #ifndef PUGIXML_NO_STL
6549 PUGI_IMPL_FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6550 {
6551 xml_writer_stream writer(stream);
6552
6553 print(writer, indent, flags, encoding, depth);
6554 }
6555
6556 PUGI_IMPL_FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6557 {
6558 xml_writer_stream writer(stream);
6559
6560 print(writer, indent, flags, encoding_wchar, depth);
6561 }
6562 #endif
6563
6564 PUGI_IMPL_FN ptrdiff_t xml_node::offset_debug() const
6565 {
6566 if (!_root) return -1;
6567
6568 impl::xml_document_struct& doc = impl::get_document(_root);
6569
6570 // we can determine the offset reliably only if there is exactly once parse buffer
6571 if (!doc.buffer || doc.extra_buffers) return -1;
6572
6573 switch (type())
6574 {
6575 case node_document:
6576 return 0;
6577
6578 case node_element:
6579 case node_declaration:
6580 case node_pi:
6581 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6582
6583 case node_pcdata:
6584 case node_cdata:
6585 case node_comment:
6586 case node_doctype:
6587 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6588
6589 default:
6590 assert(false && "Invalid node type"); // unreachable
6591 return -1;
6592 }
6593 }
6594
6595 #ifdef __BORLANDC__
6596 PUGI_IMPL_FN bool operator&&(const xml_node& lhs, bool rhs)
6597 {
6598 return (bool)lhs && rhs;
6599 }
6600
6601 PUGI_IMPL_FN bool operator||(const xml_node& lhs, bool rhs)
6602 {
6603 return (bool)lhs || rhs;
6604 }
6605 #endif
6606
6607 PUGI_IMPL_FN xml_text::xml_text(xml_node_struct* root): _root(root)
6608 {
6609 }
6610
6611 PUGI_IMPL_FN xml_node_struct* xml_text::_data() const
6612 {
6613 if (!_root || impl::is_text_node(_root)) return _root;
6614
6615 // element nodes can have value if parse_embed_pcdata was used
6616 if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value)
6617 return _root;
6618
6619 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6620 if (impl::is_text_node(node))
6621 return node;
6622
6623 return 0;
6624 }
6625
6626 PUGI_IMPL_FN xml_node_struct* xml_text::_data_new()
6627 {
6628 xml_node_struct* d = _data();
6629 if (d) return d;
6630
6631 return xml_node(_root).append_child(node_pcdata).internal_object();
6632 }
6633
6634 PUGI_IMPL_FN xml_text::xml_text(): _root(0)
6635 {
6636 }
6637
6638 PUGI_IMPL_FN static void unspecified_bool_xml_text(xml_text***)
6639 {
6640 }
6641
6642 PUGI_IMPL_FN xml_text::operator xml_text::unspecified_bool_type() const
6643 {
6644 return _data() ? unspecified_bool_xml_text : 0;
6645 }
6646
6647 PUGI_IMPL_FN bool xml_text::operator!() const
6648 {
6649 return !_data();
6650 }
6651
6652 PUGI_IMPL_FN bool xml_text::empty() const
6653 {
6654 return _data() == 0;
6655 }
6656
6657 PUGI_IMPL_FN const char_t* xml_text::get() const
6658 {
6659 xml_node_struct* d = _data();
6660 if (!d) return PUGIXML_TEXT("");
6661 const char_t* value = d->value;
6662 return value ? value : PUGIXML_TEXT("");
6663 }
6664
6665 PUGI_IMPL_FN const char_t* xml_text::as_string(const char_t* def) const
6666 {
6667 xml_node_struct* d = _data();
6668 if (!d) return def;
6669 const char_t* value = d->value;
6670 return value ? value : def;
6671 }
6672
6673 PUGI_IMPL_FN int xml_text::as_int(int def) const
6674 {
6675 xml_node_struct* d = _data();
6676 if (!d) return def;
6677 const char_t* value = d->value;
6678 return value ? impl::get_value_int(value) : def;
6679 }
6680
6681 PUGI_IMPL_FN unsigned int xml_text::as_uint(unsigned int def) const
6682 {
6683 xml_node_struct* d = _data();
6684 if (!d) return def;
6685 const char_t* value = d->value;
6686 return value ? impl::get_value_uint(value) : def;
6687 }
6688
6689 PUGI_IMPL_FN double xml_text::as_double(double def) const
6690 {
6691 xml_node_struct* d = _data();
6692 if (!d) return def;
6693 const char_t* value = d->value;
6694 return value ? impl::get_value_double(value) : def;
6695 }
6696
6697 PUGI_IMPL_FN float xml_text::as_float(float def) const
6698 {
6699 xml_node_struct* d = _data();
6700 if (!d) return def;
6701 const char_t* value = d->value;
6702 return value ? impl::get_value_float(value) : def;
6703 }
6704
6705 PUGI_IMPL_FN bool xml_text::as_bool(bool def) const
6706 {
6707 xml_node_struct* d = _data();
6708 if (!d) return def;
6709 const char_t* value = d->value;
6710 return value ? impl::get_value_bool(value) : def;
6711 }
6712
6713 #ifdef PUGIXML_HAS_LONG_LONG
6714 PUGI_IMPL_FN long long xml_text::as_llong(long long def) const
6715 {
6716 xml_node_struct* d = _data();
6717 if (!d) return def;
6718 const char_t* value = d->value;
6719 return value ? impl::get_value_llong(value) : def;
6720 }
6721
6722 PUGI_IMPL_FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6723 {
6724 xml_node_struct* d = _data();
6725 if (!d) return def;
6726 const char_t* value = d->value;
6727 return value ? impl::get_value_ullong(value) : def;
6728 }
6729 #endif
6730
6731 PUGI_IMPL_FN bool xml_text::set(const char_t* rhs)
6732 {
6733 xml_node_struct* dn = _data_new();
6734
6735 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6736 }
6737
6738 PUGI_IMPL_FN bool xml_text::set(const char_t* rhs, size_t size)
6739 {
6740 xml_node_struct* dn = _data_new();
6741
6742 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, size) : false;
6743 }
6744
6745 PUGI_IMPL_FN bool xml_text::set(int rhs)
6746 {
6747 xml_node_struct* dn = _data_new();
6748
6749 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6750 }
6751
6752 PUGI_IMPL_FN bool xml_text::set(unsigned int rhs)
6753 {
6754 xml_node_struct* dn = _data_new();
6755
6756 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6757 }
6758
6759 PUGI_IMPL_FN bool xml_text::set(long rhs)
6760 {
6761 xml_node_struct* dn = _data_new();
6762
6763 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6764 }
6765
6766 PUGI_IMPL_FN bool xml_text::set(unsigned long rhs)
6767 {
6768 xml_node_struct* dn = _data_new();
6769
6770 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6771 }
6772
6773 PUGI_IMPL_FN bool xml_text::set(float rhs)
6774 {
6775 xml_node_struct* dn = _data_new();
6776
6777 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6778 }
6779
6780 PUGI_IMPL_FN bool xml_text::set(float rhs, int precision)
6781 {
6782 xml_node_struct* dn = _data_new();
6783
6784 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6785 }
6786
6787 PUGI_IMPL_FN bool xml_text::set(double rhs)
6788 {
6789 xml_node_struct* dn = _data_new();
6790
6791 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6792 }
6793
6794 PUGI_IMPL_FN bool xml_text::set(double rhs, int precision)
6795 {
6796 xml_node_struct* dn = _data_new();
6797
6798 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6799 }
6800
6801 PUGI_IMPL_FN bool xml_text::set(bool rhs)
6802 {
6803 xml_node_struct* dn = _data_new();
6804
6805 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6806 }
6807
6808 #ifdef PUGIXML_HAS_LONG_LONG
6809 PUGI_IMPL_FN bool xml_text::set(long long rhs)
6810 {
6811 xml_node_struct* dn = _data_new();
6812
6813 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6814 }
6815
6816 PUGI_IMPL_FN bool xml_text::set(unsigned long long rhs)
6817 {
6818 xml_node_struct* dn = _data_new();
6819
6820 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6821 }
6822 #endif
6823
6824 PUGI_IMPL_FN xml_text& xml_text::operator=(const char_t* rhs)
6825 {
6826 set(rhs);
6827 return *this;
6828 }
6829
6830 PUGI_IMPL_FN xml_text& xml_text::operator=(int rhs)
6831 {
6832 set(rhs);
6833 return *this;
6834 }
6835
6836 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned int rhs)
6837 {
6838 set(rhs);
6839 return *this;
6840 }
6841
6842 PUGI_IMPL_FN xml_text& xml_text::operator=(long rhs)
6843 {
6844 set(rhs);
6845 return *this;
6846 }
6847
6848 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long rhs)
6849 {
6850 set(rhs);
6851 return *this;
6852 }
6853
6854 PUGI_IMPL_FN xml_text& xml_text::operator=(double rhs)
6855 {
6856 set(rhs);
6857 return *this;
6858 }
6859
6860 PUGI_IMPL_FN xml_text& xml_text::operator=(float rhs)
6861 {
6862 set(rhs);
6863 return *this;
6864 }
6865
6866 PUGI_IMPL_FN xml_text& xml_text::operator=(bool rhs)
6867 {
6868 set(rhs);
6869 return *this;
6870 }
6871
6872 #ifdef PUGIXML_HAS_LONG_LONG
6873 PUGI_IMPL_FN xml_text& xml_text::operator=(long long rhs)
6874 {
6875 set(rhs);
6876 return *this;
6877 }
6878
6879 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long long rhs)
6880 {
6881 set(rhs);
6882 return *this;
6883 }
6884 #endif
6885
6886 PUGI_IMPL_FN xml_node xml_text::data() const
6887 {
6888 return xml_node(_data());
6889 }
6890
6891 #ifdef __BORLANDC__
6892 PUGI_IMPL_FN bool operator&&(const xml_text& lhs, bool rhs)
6893 {
6894 return (bool)lhs && rhs;
6895 }
6896
6897 PUGI_IMPL_FN bool operator||(const xml_text& lhs, bool rhs)
6898 {
6899 return (bool)lhs || rhs;
6900 }
6901 #endif
6902
6903 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator()
6904 {
6905 }
6906
6907 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6908 {
6909 }
6910
6911 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6912 {
6913 }
6914
6915 PUGI_IMPL_FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6916 {
6917 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6918 }
6919
6920 PUGI_IMPL_FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6921 {
6922 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6923 }
6924
6925 PUGI_IMPL_FN xml_node& xml_node_iterator::operator*() const
6926 {
6927 assert(_wrap._root);
6928 return _wrap;
6929 }
6930
6931 PUGI_IMPL_FN xml_node* xml_node_iterator::operator->() const
6932 {
6933 assert(_wrap._root);
6934 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6935 }
6936
6937 PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator++()
6938 {
6939 assert(_wrap._root);
6940 _wrap._root = _wrap._root->next_sibling;
6941 return *this;
6942 }
6943
6944 PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator++(int)
6945 {
6946 xml_node_iterator temp = *this;
6947 ++*this;
6948 return temp;
6949 }
6950
6951 PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator--()
6952 {
6953 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6954 return *this;
6955 }
6956
6957 PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator--(int)
6958 {
6959 xml_node_iterator temp = *this;
6960 --*this;
6961 return temp;
6962 }
6963
6964 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator()
6965 {
6966 }
6967
6968 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6969 {
6970 }
6971
6972 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6973 {
6974 }
6975
6976 PUGI_IMPL_FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6977 {
6978 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6979 }
6980
6981 PUGI_IMPL_FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6982 {
6983 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6984 }
6985
6986 PUGI_IMPL_FN xml_attribute& xml_attribute_iterator::operator*() const
6987 {
6988 assert(_wrap._attr);
6989 return _wrap;
6990 }
6991
6992 PUGI_IMPL_FN xml_attribute* xml_attribute_iterator::operator->() const
6993 {
6994 assert(_wrap._attr);
6995 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6996 }
6997
6998 PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator++()
6999 {
7000 assert(_wrap._attr);
7001 _wrap._attr = _wrap._attr->next_attribute;
7002 return *this;
7003 }
7004
7005 PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
7006 {
7007 xml_attribute_iterator temp = *this;
7008 ++*this;
7009 return temp;
7010 }
7011
7012 PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator--()
7013 {
7014 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
7015 return *this;
7016 }
7017
7018 PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
7019 {
7020 xml_attribute_iterator temp = *this;
7021 --*this;
7022 return temp;
7023 }
7024
7025 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
7026 {
7027 }
7028
7029 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
7030 {
7031 }
7032
7033 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
7034 {
7035 }
7036
7037 PUGI_IMPL_FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
7038 {
7039 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
7040 }
7041
7042 PUGI_IMPL_FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
7043 {
7044 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
7045 }
7046
7047 PUGI_IMPL_FN xml_node& xml_named_node_iterator::operator*() const
7048 {
7049 assert(_wrap._root);
7050 return _wrap;
7051 }
7052
7053 PUGI_IMPL_FN xml_node* xml_named_node_iterator::operator->() const
7054 {
7055 assert(_wrap._root);
7056 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
7057 }
7058
7059 PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator++()
7060 {
7061 assert(_wrap._root);
7062 _wrap = _wrap.next_sibling(_name);
7063 return *this;
7064 }
7065
7066 PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
7067 {
7068 xml_named_node_iterator temp = *this;
7069 ++*this;
7070 return temp;
7071 }
7072
7073 PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator--()
7074 {
7075 if (_wrap._root)
7076 _wrap = _wrap.previous_sibling(_name);
7077 else
7078 {
7079 _wrap = _parent.last_child();
7080
7081 if (!impl::strequal(_wrap.name(), _name))
7082 _wrap = _wrap.previous_sibling(_name);
7083 }
7084
7085 return *this;
7086 }
7087
7088 PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
7089 {
7090 xml_named_node_iterator temp = *this;
7091 --*this;
7092 return temp;
7093 }
7094
7095 PUGI_IMPL_FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
7096 {
7097 }
7098
7099 PUGI_IMPL_FN xml_parse_result::operator bool() const
7100 {
7101 return status == status_ok;
7102 }
7103
7104 PUGI_IMPL_FN const char* xml_parse_result::description() const
7105 {
7106 switch (status)
7107 {
7108 case status_ok: return "No error";
7109
7110 case status_file_not_found: return "File was not found";
7111 case status_io_error: return "Error reading from file/stream";
7112 case status_out_of_memory: return "Could not allocate memory";
7113 case status_internal_error: return "Internal error occurred";
7114
7115 case status_unrecognized_tag: return "Could not determine tag type";
7116
7117 case status_bad_pi: return "Error parsing document declaration/processing instruction";
7118 case status_bad_comment: return "Error parsing comment";
7119 case status_bad_cdata: return "Error parsing CDATA section";
7120 case status_bad_doctype: return "Error parsing document type declaration";
7121 case status_bad_pcdata: return "Error parsing PCDATA section";
7122 case status_bad_start_element: return "Error parsing start element tag";
7123 case status_bad_attribute: return "Error parsing element attribute";
7124 case status_bad_end_element: return "Error parsing end element tag";
7125 case status_end_element_mismatch: return "Start-end tags mismatch";
7126
7127 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
7128
7129 case status_no_document_element: return "No document element found";
7130
7131 default: return "Unknown error";
7132 }
7133 }
7134
7135 PUGI_IMPL_FN xml_document::xml_document(): _buffer(0)
7136 {
7137 _create();
7138 }
7139
7140 PUGI_IMPL_FN xml_document::~xml_document()
7141 {
7142 _destroy();
7143 }
7144
7145 #ifdef PUGIXML_HAS_MOVE
7146 PUGI_IMPL_FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
7147 {
7148 _create();
7149 _move(rhs);
7150 }
7151
7152 PUGI_IMPL_FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7153 {
7154 if (this == &rhs) return *this;
7155
7156 _destroy();
7157 _create();
7158 _move(rhs);
7159
7160 return *this;
7161 }
7162 #endif
7163
7164 PUGI_IMPL_FN void xml_document::reset()
7165 {
7166 _destroy();
7167 _create();
7168 }
7169
7170 PUGI_IMPL_FN void xml_document::reset(const xml_document& proto)
7171 {
7172 reset();
7173
7174 impl::node_copy_tree(_root, proto._root);
7175 }
7176
7177 PUGI_IMPL_FN void xml_document::_create()
7178 {
7179 assert(!_root);
7180
7181 #ifdef PUGIXML_COMPACT
7182 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
7183 const size_t page_offset = sizeof(void*);
7184 #else
7185 const size_t page_offset = 0;
7186 #endif
7187
7188 // initialize sentinel page
7189 PUGI_IMPL_STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
7190
7191 // prepare page structure
7192 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
7193 assert(page);
7194
7195 page->busy_size = impl::xml_memory_page_size;
7196
7197 // setup first page marker
7198 #ifdef PUGIXML_COMPACT
7199 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
7200 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7201 *page->compact_page_marker = sizeof(impl::xml_memory_page);
7202 #endif
7203
7204 // allocate new root
7205 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7206 _root->prev_sibling_c = _root;
7207
7208 // setup sentinel page
7209 page->allocator = static_cast<impl::xml_document_struct*>(_root);
7210
7211 // setup hash table pointer in allocator
7212 #ifdef PUGIXML_COMPACT
7213 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7214 #endif
7215
7216 // verify the document allocation
7217 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7218 }
7219
7220 PUGI_IMPL_FN void xml_document::_destroy()
7221 {
7222 assert(_root);
7223
7224 // destroy static storage
7225 if (_buffer)
7226 {
7227 impl::xml_memory::deallocate(_buffer);
7228 _buffer = 0;
7229 }
7230
7231 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7232 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7233 {
7234 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7235 }
7236
7237 // destroy dynamic storage, leave sentinel page (it's in static memory)
7238 impl::xml_memory_page* root_page = PUGI_IMPL_GETPAGE(_root);
7239 assert(root_page && !root_page->prev);
7240 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7241
7242 for (impl::xml_memory_page* page = root_page->next; page; )
7243 {
7244 impl::xml_memory_page* next = page->next;
7245
7246 impl::xml_allocator::deallocate_page(page);
7247
7248 page = next;
7249 }
7250
7251 #ifdef PUGIXML_COMPACT
7252 // destroy hash table
7253 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7254 #endif
7255
7256 _root = 0;
7257 }
7258
7259 #ifdef PUGIXML_HAS_MOVE
7260 PUGI_IMPL_FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7261 {
7262 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7263 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7264
7265 // save first child pointer for later; this needs hash access
7266 xml_node_struct* other_first_child = other->first_child;
7267
7268 #ifdef PUGIXML_COMPACT
7269 // reserve space for the hash table up front; this is the only operation that can fail
7270 // if it does, we have no choice but to throw (if we have exceptions)
7271 if (other_first_child)
7272 {
7273 size_t other_children = 0;
7274 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7275 other_children++;
7276
7277 // in compact mode, each pointer assignment could result in a hash table request
7278 // during move, we have to relocate document first_child and parents of all children
7279 // normally there's just one child and its parent has a pointerless encoding but
7280 // we assume the worst here
7281 if (!other->_hash->reserve(other_children + 1))
7282 {
7283 #ifdef PUGIXML_NO_EXCEPTIONS
7284 return;
7285 #else
7286 throw std::bad_alloc();
7287 #endif
7288 }
7289 }
7290 #endif
7291
7292 // move allocation state
7293 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
7294 if (other->_root != PUGI_IMPL_GETPAGE(other))
7295 {
7296 doc->_root = other->_root;
7297 doc->_busy_size = other->_busy_size;
7298 }
7299
7300 // move buffer state
7301 doc->buffer = other->buffer;
7302 doc->extra_buffers = other->extra_buffers;
7303 _buffer = rhs._buffer;
7304
7305 #ifdef PUGIXML_COMPACT
7306 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7307 doc->hash = other->hash;
7308 doc->_hash = &doc->hash;
7309
7310 // make sure we don't access other hash up until the end when we reinitialize other document
7311 other->_hash = 0;
7312 #endif
7313
7314 // move page structure
7315 impl::xml_memory_page* doc_page = PUGI_IMPL_GETPAGE(doc);
7316 assert(doc_page && !doc_page->prev && !doc_page->next);
7317
7318 impl::xml_memory_page* other_page = PUGI_IMPL_GETPAGE(other);
7319 assert(other_page && !other_page->prev);
7320
7321 // relink pages since root page is embedded into xml_document
7322 if (impl::xml_memory_page* page = other_page->next)
7323 {
7324 assert(page->prev == other_page);
7325
7326 page->prev = doc_page;
7327
7328 doc_page->next = page;
7329 other_page->next = 0;
7330 }
7331
7332 // make sure pages point to the correct document state
7333 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7334 {
7335 assert(page->allocator == other);
7336
7337 page->allocator = doc;
7338
7339 #ifdef PUGIXML_COMPACT
7340 // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7341 if (page->compact_shared_parent == other)
7342 page->compact_shared_parent = doc;
7343 #endif
7344 }
7345
7346 // move tree structure
7347 assert(!doc->first_child);
7348
7349 doc->first_child = other_first_child;
7350
7351 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7352 {
7353 #ifdef PUGIXML_COMPACT
7354 // most children will have migrated when we reassigned compact_shared_parent
7355 assert(node->parent == other || node->parent == doc);
7356
7357 node->parent = doc;
7358 #else
7359 assert(node->parent == other);
7360 node->parent = doc;
7361 #endif
7362 }
7363
7364 // reset other document
7365 new (other) impl::xml_document_struct(PUGI_IMPL_GETPAGE(other));
7366 rhs._buffer = 0;
7367 }
7368 #endif
7369
7370 #ifndef PUGIXML_NO_STL
7371 PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7372 {
7373 reset();
7374
7375 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7376 }
7377
7378 PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7379 {
7380 reset();
7381
7382 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7383 }
7384 #endif
7385
7386 PUGI_IMPL_FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7387 {
7388 // Force native encoding (skip autodetection)
7389 #ifdef PUGIXML_WCHAR_MODE
7390 xml_encoding encoding = encoding_wchar;
7391 #else
7392 xml_encoding encoding = encoding_utf8;
7393 #endif
7394
7395 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7396 }
7397
7398 PUGI_IMPL_FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7399 {
7400 return load_string(contents, options);
7401 }
7402
7403 PUGI_IMPL_FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7404 {
7405 reset();
7406
7407 using impl::auto_deleter; // MSVC7 workaround
7408 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7409
7410 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7411 }
7412
7413 PUGI_IMPL_FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7414 {
7415 reset();
7416
7417 using impl::auto_deleter; // MSVC7 workaround
7418 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7419
7420 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7421 }
7422
7423 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7424 {
7425 reset();
7426
7427 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7428 }
7429
7430 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7431 {
7432 reset();
7433
7434 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7435 }
7436
7437 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7438 {
7439 reset();
7440
7441 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7442 }
7443
7444 PUGI_IMPL_FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7445 {
7446 impl::xml_buffered_writer buffered_writer(writer, encoding);
7447
7448 if ((flags & format_write_bom) && encoding != encoding_latin1)
7449 {
7450 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7451 #ifdef PUGIXML_WCHAR_MODE
7452 unsigned int bom = 0xfeff;
7453 buffered_writer.write(static_cast<wchar_t>(bom));
7454 #else
7455 buffered_writer.write('\xef', '\xbb', '\xbf');
7456 #endif
7457 }
7458
7459 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7460 {
7461 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7462 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7463 buffered_writer.write('?', '>');
7464 if (!(flags & format_raw)) buffered_writer.write('\n');
7465 }
7466
7467 impl::node_output(buffered_writer, _root, indent, flags, 0);
7468
7469 buffered_writer.flush();
7470 }
7471
7472 #ifndef PUGIXML_NO_STL
7473 PUGI_IMPL_FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7474 {
7475 xml_writer_stream writer(stream);
7476
7477 save(writer, indent, flags, encoding);
7478 }
7479
7480 PUGI_IMPL_FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7481 {
7482 xml_writer_stream writer(stream);
7483
7484 save(writer, indent, flags, encoding_wchar);
7485 }
7486 #endif
7487
7488 PUGI_IMPL_FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7489 {
7490 using impl::auto_deleter; // MSVC7 workaround
7491 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7492
7493 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
7494 }
7495
7496 PUGI_IMPL_FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7497 {
7498 using impl::auto_deleter; // MSVC7 workaround
7499 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7500
7501 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
7502 }
7503
7504 PUGI_IMPL_FN xml_node xml_document::document_element() const
7505 {
7506 assert(_root);
7507
7508 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7509 if (PUGI_IMPL_NODETYPE(i) == node_element)
7510 return xml_node(i);
7511
7512 return xml_node();
7513 }
7514
7515 #ifndef PUGIXML_NO_STL
7516 PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7517 {
7518 assert(str);
7519
7520 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7521 }
7522
7523 PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7524 {
7525 return impl::as_utf8_impl(str.c_str(), str.size());
7526 }
7527
7528 PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7529 {
7530 assert(str);
7531
7532 return impl::as_wide_impl(str, strlen(str));
7533 }
7534
7535 PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7536 {
7537 return impl::as_wide_impl(str.c_str(), str.size());
7538 }
7539 #endif
7540
7541 PUGI_IMPL_FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7542 {
7543 impl::xml_memory::allocate = allocate;
7544 impl::xml_memory::deallocate = deallocate;
7545 }
7546
7547 PUGI_IMPL_FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7548 {
7549 return impl::xml_memory::allocate;
7550 }
7551
7552 PUGI_IMPL_FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7553 {
7554 return impl::xml_memory::deallocate;
7555 }
7556 }
7557
7558 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7559 namespace std
7560 {
7561 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7562 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7563 {
7564 return std::bidirectional_iterator_tag();
7565 }
7566
7567 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7568 {
7569 return std::bidirectional_iterator_tag();
7570 }
7571
7572 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7573 {
7574 return std::bidirectional_iterator_tag();
7575 }
7576 }
7577 #endif
7578
7579 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7580 namespace std
7581 {
7582 // Workarounds for (non-standard) iterator category detection
7583 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7584 {
7585 return std::bidirectional_iterator_tag();
7586 }
7587
7588 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7589 {
7590 return std::bidirectional_iterator_tag();
7591 }
7592
7593 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7594 {
7595 return std::bidirectional_iterator_tag();
7596 }
7597 }
7598 #endif
7599
7600 #ifndef PUGIXML_NO_XPATH
7601 // STL replacements
7602 PUGI_IMPL_NS_BEGIN
7603 struct equal_to
7604 {
7605 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7606 {
7607 return lhs == rhs;
7608 }
7609 };
7610
7611 struct not_equal_to
7612 {
7613 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7614 {
7615 return lhs != rhs;
7616 }
7617 };
7618
7619 struct less
7620 {
7621 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7622 {
7623 return lhs < rhs;
7624 }
7625 };
7626
7627 struct less_equal
7628 {
7629 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7630 {
7631 return lhs <= rhs;
7632 }
7633 };
7634
7635 template <typename T> inline void swap(T& lhs, T& rhs)
7636 {
7637 T temp = lhs;
7638 lhs = rhs;
7639 rhs = temp;
7640 }
7641
7642 template <typename I, typename Pred> PUGI_IMPL_FN I min_element(I begin, I end, const Pred& pred)
7643 {
7644 I result = begin;
7645
7646 for (I it = begin + 1; it != end; ++it)
7647 if (pred(*it, *result))
7648 result = it;
7649
7650 return result;
7651 }
7652
7653 template <typename I> PUGI_IMPL_FN void reverse(I begin, I end)
7654 {
7655 while (end - begin > 1)
7656 swap(*begin++, *--end);
7657 }
7658
7659 template <typename I> PUGI_IMPL_FN I unique(I begin, I end)
7660 {
7661 // fast skip head
7662 while (end - begin > 1 && *begin != *(begin + 1))
7663 begin++;
7664
7665 if (begin == end)
7666 return begin;
7667
7668 // last written element
7669 I write = begin++;
7670
7671 // merge unique elements
7672 while (begin != end)
7673 {
7674 if (*begin != *write)
7675 *++write = *begin++;
7676 else
7677 begin++;
7678 }
7679
7680 // past-the-end (write points to live element)
7681 return write + 1;
7682 }
7683
7684 template <typename T, typename Pred> PUGI_IMPL_FN void insertion_sort(T* begin, T* end, const Pred& pred)
7685 {
7686 if (begin == end)
7687 return;
7688
7689 for (T* it = begin + 1; it != end; ++it)
7690 {
7691 T val = *it;
7692 T* hole = it;
7693
7694 // move hole backwards
7695 while (hole > begin && pred(val, *(hole - 1)))
7696 {
7697 *hole = *(hole - 1);
7698 hole--;
7699 }
7700
7701 // fill hole with element
7702 *hole = val;
7703 }
7704 }
7705
7706 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7707 {
7708 if (pred(*middle, *first))
7709 swap(middle, first);
7710 if (pred(*last, *middle))
7711 swap(last, middle);
7712 if (pred(*middle, *first))
7713 swap(middle, first);
7714
7715 return middle;
7716 }
7717
7718 template <typename T, typename Pred> PUGI_IMPL_FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7719 {
7720 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7721 T* eq = begin;
7722 T* lt = begin;
7723 T* gt = end;
7724
7725 while (lt < gt)
7726 {
7727 if (pred(*lt, pivot))
7728 lt++;
7729 else if (*lt == pivot)
7730 swap(*eq++, *lt++);
7731 else
7732 swap(*lt, *--gt);
7733 }
7734
7735 // we now have just 4 groups: = < >; move equal elements to the middle
7736 T* eqbeg = gt;
7737
7738 for (T* it = begin; it != eq; ++it)
7739 swap(*it, *--eqbeg);
7740
7741 *out_eqbeg = eqbeg;
7742 *out_eqend = gt;
7743 }
7744
7745 template <typename I, typename Pred> PUGI_IMPL_FN void sort(I begin, I end, const Pred& pred)
7746 {
7747 // sort large chunks
7748 while (end - begin > 16)
7749 {
7750 // find median element
7751 I middle = begin + (end - begin) / 2;
7752 I median = median3(begin, middle, end - 1, pred);
7753
7754 // partition in three chunks (< = >)
7755 I eqbeg, eqend;
7756 partition3(begin, end, *median, pred, &eqbeg, &eqend);
7757
7758 // loop on larger half
7759 if (eqbeg - begin > end - eqend)
7760 {
7761 sort(eqend, end, pred);
7762 end = eqbeg;
7763 }
7764 else
7765 {
7766 sort(begin, eqbeg, pred);
7767 begin = eqend;
7768 }
7769 }
7770
7771 // insertion sort small chunk
7772 insertion_sort(begin, end, pred);
7773 }
7774
7775 PUGI_IMPL_FN bool hash_insert(const void** table, size_t size, const void* key)
7776 {
7777 assert(key);
7778
7779 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7780
7781 // MurmurHash3 32-bit finalizer
7782 h ^= h >> 16;
7783 h *= 0x85ebca6bu;
7784 h ^= h >> 13;
7785 h *= 0xc2b2ae35u;
7786 h ^= h >> 16;
7787
7788 size_t hashmod = size - 1;
7789 size_t bucket = h & hashmod;
7790
7791 for (size_t probe = 0; probe <= hashmod; ++probe)
7792 {
7793 if (table[bucket] == 0)
7794 {
7795 table[bucket] = key;
7796 return true;
7797 }
7798
7799 if (table[bucket] == key)
7800 return false;
7801
7802 // hash collision, quadratic probing
7803 bucket = (bucket + probe + 1) & hashmod;
7804 }
7805
7806 assert(false && "Hash table is full"); // unreachable
7807 return false;
7808 }
7809 PUGI_IMPL_NS_END
7810
7811 // Allocator used for AST and evaluation stacks
7812 PUGI_IMPL_NS_BEGIN
7813 static const size_t xpath_memory_page_size =
7814 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7815 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7816 #else
7817 4096
7818 #endif
7819 ;
7820
7821 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7822
7823 struct xpath_memory_block
7824 {
7825 xpath_memory_block* next;
7826 size_t capacity;
7827
7828 union
7829 {
7830 char data[xpath_memory_page_size];
7831 double alignment;
7832 };
7833 };
7834
7835 struct xpath_allocator
7836 {
7837 xpath_memory_block* _root;
7838 size_t _root_size;
7839 bool* _error;
7840
7841 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7842 {
7843 }
7844
7845 void* allocate(size_t size)
7846 {
7847 // round size up to block alignment boundary
7848 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7849
7850 if (_root_size + size <= _root->capacity)
7851 {
7852 void* buf = &_root->data[0] + _root_size;
7853 _root_size += size;
7854 return buf;
7855 }
7856 else
7857 {
7858 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7859 size_t block_capacity_base = sizeof(_root->data);
7860 size_t block_capacity_req = size + block_capacity_base / 4;
7861 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7862
7863 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7864
7865 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7866 if (!block)
7867 {
7868 if (_error) *_error = true;
7869 return 0;
7870 }
7871
7872 block->next = _root;
7873 block->capacity = block_capacity;
7874
7875 _root = block;
7876 _root_size = size;
7877
7878 return block->data;
7879 }
7880 }
7881
7882 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7883 {
7884 // round size up to block alignment boundary
7885 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7886 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7887
7888 // we can only reallocate the last object
7889 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7890
7891 // try to reallocate the object inplace
7892 if (ptr && _root_size - old_size + new_size <= _root->capacity)
7893 {
7894 _root_size = _root_size - old_size + new_size;
7895 return ptr;
7896 }
7897
7898 // allocate a new block
7899 void* result = allocate(new_size);
7900 if (!result) return 0;
7901
7902 // we have a new block
7903 if (ptr)
7904 {
7905 // copy old data (we only support growing)
7906 assert(new_size >= old_size);
7907 memcpy(result, ptr, old_size);
7908
7909 // free the previous page if it had no other objects
7910 assert(_root->data == result);
7911 assert(_root->next);
7912
7913 if (_root->next->data == ptr)
7914 {
7915 // deallocate the whole page, unless it was the first one
7916 xpath_memory_block* next = _root->next->next;
7917
7918 if (next)
7919 {
7920 xml_memory::deallocate(_root->next);
7921 _root->next = next;
7922 }
7923 }
7924 }
7925
7926 return result;
7927 }
7928
7929 void revert(const xpath_allocator& state)
7930 {
7931 // free all new pages
7932 xpath_memory_block* cur = _root;
7933
7934 while (cur != state._root)
7935 {
7936 xpath_memory_block* next = cur->next;
7937
7938 xml_memory::deallocate(cur);
7939
7940 cur = next;
7941 }
7942
7943 // restore state
7944 _root = state._root;
7945 _root_size = state._root_size;
7946 }
7947
7948 void release()
7949 {
7950 xpath_memory_block* cur = _root;
7951 assert(cur);
7952
7953 while (cur->next)
7954 {
7955 xpath_memory_block* next = cur->next;
7956
7957 xml_memory::deallocate(cur);
7958
7959 cur = next;
7960 }
7961 }
7962 };
7963
7964 struct xpath_allocator_capture
7965 {
7966 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7967 {
7968 }
7969
7970 ~xpath_allocator_capture()
7971 {
7972 _target->revert(_state);
7973 }
7974
7975 xpath_allocator* _target;
7976 xpath_allocator _state;
7977 };
7978
7979 struct xpath_stack
7980 {
7981 xpath_allocator* result;
7982 xpath_allocator* temp;
7983 };
7984
7985 struct xpath_stack_data
7986 {
7987 xpath_memory_block blocks[2];
7988 xpath_allocator result;
7989 xpath_allocator temp;
7990 xpath_stack stack;
7991 bool oom;
7992
7993 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7994 {
7995 blocks[0].next = blocks[1].next = 0;
7996 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7997
7998 stack.result = &result;
7999 stack.temp = &temp;
8000 }
8001
8002 ~xpath_stack_data()
8003 {
8004 result.release();
8005 temp.release();
8006 }
8007 };
8008 PUGI_IMPL_NS_END
8009
8010 // String class
8011 PUGI_IMPL_NS_BEGIN
8012 class xpath_string
8013 {
8014 const char_t* _buffer;
8015 bool _uses_heap;
8016 size_t _length_heap;
8017
8018 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
8019 {
8020 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
8021 if (!result) return 0;
8022
8023 memcpy(result, string, length * sizeof(char_t));
8024 result[length] = 0;
8025
8026 return result;
8027 }
8028
8029 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
8030 {
8031 }
8032
8033 public:
8034 static xpath_string from_const(const char_t* str)
8035 {
8036 return xpath_string(str, false, 0);
8037 }
8038
8039 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
8040 {
8041 assert(begin <= end && *end == 0);
8042
8043 return xpath_string(begin, true, static_cast<size_t>(end - begin));
8044 }
8045
8046 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
8047 {
8048 assert(begin <= end);
8049
8050 if (begin == end)
8051 return xpath_string();
8052
8053 size_t length = static_cast<size_t>(end - begin);
8054 const char_t* data = duplicate_string(begin, length, alloc);
8055
8056 return data ? xpath_string(data, true, length) : xpath_string();
8057 }
8058
8059 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
8060 {
8061 }
8062
8063 void append(const xpath_string& o, xpath_allocator* alloc)
8064 {
8065 // skip empty sources
8066 if (!*o._buffer) return;
8067
8068 // fast append for constant empty target and constant source
8069 if (!*_buffer && !_uses_heap && !o._uses_heap)
8070 {
8071 _buffer = o._buffer;
8072 }
8073 else
8074 {
8075 // need to make heap copy
8076 size_t target_length = length();
8077 size_t source_length = o.length();
8078 size_t result_length = target_length + source_length;
8079
8080 // allocate new buffer
8081 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
8082 if (!result) return;
8083
8084 // append first string to the new buffer in case there was no reallocation
8085 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
8086
8087 // append second string to the new buffer
8088 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
8089 result[result_length] = 0;
8090
8091 // finalize
8092 _buffer = result;
8093 _uses_heap = true;
8094 _length_heap = result_length;
8095 }
8096 }
8097
8098 const char_t* c_str() const
8099 {
8100 return _buffer;
8101 }
8102
8103 size_t length() const
8104 {
8105 return _uses_heap ? _length_heap : strlength(_buffer);
8106 }
8107
8108 char_t* data(xpath_allocator* alloc)
8109 {
8110 // make private heap copy
8111 if (!_uses_heap)
8112 {
8113 size_t length_ = strlength(_buffer);
8114 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
8115
8116 if (!data_) return 0;
8117
8118 _buffer = data_;
8119 _uses_heap = true;
8120 _length_heap = length_;
8121 }
8122
8123 return const_cast<char_t*>(_buffer);
8124 }
8125
8126 bool empty() const
8127 {
8128 return *_buffer == 0;
8129 }
8130
8131 bool operator==(const xpath_string& o) const
8132 {
8133 return strequal(_buffer, o._buffer);
8134 }
8135
8136 bool operator!=(const xpath_string& o) const
8137 {
8138 return !strequal(_buffer, o._buffer);
8139 }
8140
8141 bool uses_heap() const
8142 {
8143 return _uses_heap;
8144 }
8145 };
8146 PUGI_IMPL_NS_END
8147
8148 PUGI_IMPL_NS_BEGIN
8149 PUGI_IMPL_FN bool starts_with(const char_t* string, const char_t* pattern)
8150 {
8151 while (*pattern && *string == *pattern)
8152 {
8153 string++;
8154 pattern++;
8155 }
8156
8157 return *pattern == 0;
8158 }
8159
8160 PUGI_IMPL_FN const char_t* find_char(const char_t* s, char_t c)
8161 {
8162 #ifdef PUGIXML_WCHAR_MODE
8163 return wcschr(s, c);
8164 #else
8165 return strchr(s, c);
8166 #endif
8167 }
8168
8169 PUGI_IMPL_FN const char_t* find_substring(const char_t* s, const char_t* p)
8170 {
8171 #ifdef PUGIXML_WCHAR_MODE
8172 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
8173 return (*p == 0) ? s : wcsstr(s, p);
8174 #else
8175 return strstr(s, p);
8176 #endif
8177 }
8178
8179 // Converts symbol to lower case, if it is an ASCII one
8180 PUGI_IMPL_FN char_t tolower_ascii(char_t ch)
8181 {
8182 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
8183 }
8184
8185 PUGI_IMPL_FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
8186 {
8187 if (na.attribute())
8188 return xpath_string::from_const(na.attribute().value());
8189 else
8190 {
8191 xml_node n = na.node();
8192
8193 switch (n.type())
8194 {
8195 case node_pcdata:
8196 case node_cdata:
8197 case node_comment:
8198 case node_pi:
8199 return xpath_string::from_const(n.value());
8200
8201 case node_document:
8202 case node_element:
8203 {
8204 xpath_string result;
8205
8206 // element nodes can have value if parse_embed_pcdata was used
8207 if (n.value()[0])
8208 result.append(xpath_string::from_const(n.value()), alloc);
8209
8210 xml_node cur = n.first_child();
8211
8212 while (cur && cur != n)
8213 {
8214 if (cur.type() == node_pcdata || cur.type() == node_cdata)
8215 result.append(xpath_string::from_const(cur.value()), alloc);
8216
8217 if (cur.first_child())
8218 cur = cur.first_child();
8219 else if (cur.next_sibling())
8220 cur = cur.next_sibling();
8221 else
8222 {
8223 while (!cur.next_sibling() && cur != n)
8224 cur = cur.parent();
8225
8226 if (cur != n) cur = cur.next_sibling();
8227 }
8228 }
8229
8230 return result;
8231 }
8232
8233 default:
8234 return xpath_string();
8235 }
8236 }
8237 }
8238
8239 PUGI_IMPL_FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8240 {
8241 assert(ln->parent == rn->parent);
8242
8243 // there is no common ancestor (the shared parent is null), nodes are from different documents
8244 if (!ln->parent) return ln < rn;
8245
8246 // determine sibling order
8247 xml_node_struct* ls = ln;
8248 xml_node_struct* rs = rn;
8249
8250 while (ls && rs)
8251 {
8252 if (ls == rn) return true;
8253 if (rs == ln) return false;
8254
8255 ls = ls->next_sibling;
8256 rs = rs->next_sibling;
8257 }
8258
8259 // if rn sibling chain ended ln must be before rn
8260 return !rs;
8261 }
8262
8263 PUGI_IMPL_FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8264 {
8265 // find common ancestor at the same depth, if any
8266 xml_node_struct* lp = ln;
8267 xml_node_struct* rp = rn;
8268
8269 while (lp && rp && lp->parent != rp->parent)
8270 {
8271 lp = lp->parent;
8272 rp = rp->parent;
8273 }
8274
8275 // parents are the same!
8276 if (lp && rp) return node_is_before_sibling(lp, rp);
8277
8278 // nodes are at different depths, need to normalize heights
8279 bool left_higher = !lp;
8280
8281 while (lp)
8282 {
8283 lp = lp->parent;
8284 ln = ln->parent;
8285 }
8286
8287 while (rp)
8288 {
8289 rp = rp->parent;
8290 rn = rn->parent;
8291 }
8292
8293 // one node is the ancestor of the other
8294 if (ln == rn) return left_higher;
8295
8296 // find common ancestor... again
8297 while (ln->parent != rn->parent)
8298 {
8299 ln = ln->parent;
8300 rn = rn->parent;
8301 }
8302
8303 return node_is_before_sibling(ln, rn);
8304 }
8305
8306 PUGI_IMPL_FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8307 {
8308 while (node && node != parent) node = node->parent;
8309
8310 return parent && node == parent;
8311 }
8312
8313 PUGI_IMPL_FN const void* document_buffer_order(const xpath_node& xnode)
8314 {
8315 xml_node_struct* node = xnode.node().internal_object();
8316
8317 if (node)
8318 {
8319 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8320 {
8321 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8322 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8323 }
8324
8325 return 0;
8326 }
8327
8328 xml_attribute_struct* attr = xnode.attribute().internal_object();
8329
8330 if (attr)
8331 {
8332 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8333 {
8334 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8335 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8336 }
8337
8338 return 0;
8339 }
8340
8341 return 0;
8342 }
8343
8344 struct document_order_comparator
8345 {
8346 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8347 {
8348 // optimized document order based check
8349 const void* lo = document_buffer_order(lhs);
8350 const void* ro = document_buffer_order(rhs);
8351
8352 if (lo && ro) return lo < ro;
8353
8354 // slow comparison
8355 xml_node ln = lhs.node(), rn = rhs.node();
8356
8357 // compare attributes
8358 if (lhs.attribute() && rhs.attribute())
8359 {
8360 // shared parent
8361 if (lhs.parent() == rhs.parent())
8362 {
8363 // determine sibling order
8364 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8365 if (a == rhs.attribute())
8366 return true;
8367
8368 return false;
8369 }
8370
8371 // compare attribute parents
8372 ln = lhs.parent();
8373 rn = rhs.parent();
8374 }
8375 else if (lhs.attribute())
8376 {
8377 // attributes go after the parent element
8378 if (lhs.parent() == rhs.node()) return false;
8379
8380 ln = lhs.parent();
8381 }
8382 else if (rhs.attribute())
8383 {
8384 // attributes go after the parent element
8385 if (rhs.parent() == lhs.node()) return true;
8386
8387 rn = rhs.parent();
8388 }
8389
8390 if (ln == rn) return false;
8391
8392 if (!ln || !rn) return ln < rn;
8393
8394 return node_is_before(ln.internal_object(), rn.internal_object());
8395 }
8396 };
8397
8398 PUGI_IMPL_FN double gen_nan()
8399 {
8400 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8401 PUGI_IMPL_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8402 typedef uint32_t UI; // BCC5 workaround
8403 union { float f; UI i; } u;
8404 u.i = 0x7fc00000;
8405 return double(u.f);
8406 #else
8407 // fallback
8408 const volatile double zero = 0.0;
8409 return zero / zero;
8410 #endif
8411 }
8412
8413 PUGI_IMPL_FN bool is_nan(double value)
8414 {
8415 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__)
8416 return !!_isnan(value);
8417 #elif defined(fpclassify) && defined(FP_NAN)
8418 return fpclassify(value) == FP_NAN;
8419 #else
8420 // fallback
8421 const volatile double v = value;
8422 return v != v;
8423 #endif
8424 }
8425
8426 PUGI_IMPL_FN const char_t* convert_number_to_string_special(double value)
8427 {
8428 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__)
8429 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8430 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8431 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8432 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8433 switch (fpclassify(value))
8434 {
8435 case FP_NAN:
8436 return PUGIXML_TEXT("NaN");
8437
8438 case FP_INFINITE:
8439 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8440
8441 case FP_ZERO:
8442 return PUGIXML_TEXT("0");
8443
8444 default:
8445 return 0;
8446 }
8447 #else
8448 // fallback
8449 const volatile double v = value;
8450
8451 if (v == 0) return PUGIXML_TEXT("0");
8452 if (v != v) return PUGIXML_TEXT("NaN");
8453 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8454 return 0;
8455 #endif
8456 }
8457
8458 PUGI_IMPL_FN bool convert_number_to_boolean(double value)
8459 {
8460 return (value != 0 && !is_nan(value));
8461 }
8462
8463 PUGI_IMPL_FN void truncate_zeros(char* begin, char* end)
8464 {
8465 while (begin != end && end[-1] == '0') end--;
8466
8467 *end = 0;
8468 }
8469
8470 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8471 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400
8472 PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8473 {
8474 // get base values
8475 int sign, exponent;
8476 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8477
8478 // truncate redundant zeros
8479 truncate_zeros(buffer, buffer + strlen(buffer));
8480
8481 // fill results
8482 *out_mantissa = buffer;
8483 *out_exponent = exponent;
8484 }
8485 #else
8486 PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8487 {
8488 // get a scientific notation value with IEEE DBL_DIG decimals
8489 PUGI_IMPL_SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8490
8491 // get the exponent (possibly negative)
8492 char* exponent_string = strchr(buffer, 'e');
8493 assert(exponent_string);
8494
8495 int exponent = atoi(exponent_string + 1);
8496
8497 // extract mantissa string: skip sign
8498 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8499 assert(mantissa[0] != '0' && (mantissa[1] == '.' || mantissa[1] == ','));
8500
8501 // divide mantissa by 10 to eliminate integer part
8502 mantissa[1] = mantissa[0];
8503 mantissa++;
8504 exponent++;
8505
8506 // remove extra mantissa digits and zero-terminate mantissa
8507 truncate_zeros(mantissa, exponent_string);
8508
8509 // fill results
8510 *out_mantissa = mantissa;
8511 *out_exponent = exponent;
8512 }
8513 #endif
8514
8515 PUGI_IMPL_FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8516 {
8517 // try special number conversion
8518 const char_t* special = convert_number_to_string_special(value);
8519 if (special) return xpath_string::from_const(special);
8520
8521 // get mantissa + exponent form
8522 char mantissa_buffer[32];
8523
8524 char* mantissa;
8525 int exponent;
8526 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8527
8528 // allocate a buffer of suitable length for the number
8529 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8530 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8531 if (!result) return xpath_string();
8532
8533 // make the number!
8534 char_t* s = result;
8535
8536 // sign
8537 if (value < 0) *s++ = '-';
8538
8539 // integer part
8540 if (exponent <= 0)
8541 {
8542 *s++ = '0';
8543 }
8544 else
8545 {
8546 while (exponent > 0)
8547 {
8548 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8549 *s++ = *mantissa ? *mantissa++ : '0';
8550 exponent--;
8551 }
8552 }
8553
8554 // fractional part
8555 if (*mantissa)
8556 {
8557 // decimal point
8558 *s++ = '.';
8559
8560 // extra zeroes from negative exponent
8561 while (exponent < 0)
8562 {
8563 *s++ = '0';
8564 exponent++;
8565 }
8566
8567 // extra mantissa digits
8568 while (*mantissa)
8569 {
8570 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8571 *s++ = *mantissa++;
8572 }
8573 }
8574
8575 // zero-terminate
8576 assert(s < result + result_size);
8577 *s = 0;
8578
8579 return xpath_string::from_heap_preallocated(result, s);
8580 }
8581
8582 PUGI_IMPL_FN bool check_string_to_number_format(const char_t* string)
8583 {
8584 // parse leading whitespace
8585 while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string;
8586
8587 // parse sign
8588 if (*string == '-') ++string;
8589
8590 if (!*string) return false;
8591
8592 // if there is no integer part, there should be a decimal part with at least one digit
8593 if (!PUGI_IMPL_IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI_IMPL_IS_CHARTYPEX(string[1], ctx_digit))) return false;
8594
8595 // parse integer part
8596 while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string;
8597
8598 // parse decimal part
8599 if (*string == '.')
8600 {
8601 ++string;
8602
8603 while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string;
8604 }
8605
8606 // parse trailing whitespace
8607 while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string;
8608
8609 return *string == 0;
8610 }
8611
8612 PUGI_IMPL_FN double convert_string_to_number(const char_t* string)
8613 {
8614 // check string format
8615 if (!check_string_to_number_format(string)) return gen_nan();
8616
8617 // parse string
8618 #ifdef PUGIXML_WCHAR_MODE
8619 return wcstod(string, 0);
8620 #else
8621 return strtod(string, 0);
8622 #endif
8623 }
8624
8625 PUGI_IMPL_FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8626 {
8627 size_t length = static_cast<size_t>(end - begin);
8628 char_t* scratch = buffer;
8629
8630 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8631 {
8632 // need to make dummy on-heap copy
8633 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8634 if (!scratch) return false;
8635 }
8636
8637 // copy string to zero-terminated buffer and perform conversion
8638 memcpy(scratch, begin, length * sizeof(char_t));
8639 scratch[length] = 0;
8640
8641 *out_result = convert_string_to_number(scratch);
8642
8643 // free dummy buffer
8644 if (scratch != buffer) xml_memory::deallocate(scratch);
8645
8646 return true;
8647 }
8648
8649 PUGI_IMPL_FN double round_nearest(double value)
8650 {
8651 return floor(value + 0.5);
8652 }
8653
8654 PUGI_IMPL_FN double round_nearest_nzero(double value)
8655 {
8656 // same as round_nearest, but returns -0 for [-0.5, -0]
8657 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8658 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8659 }
8660
8661 PUGI_IMPL_FN const char_t* qualified_name(const xpath_node& node)
8662 {
8663 return node.attribute() ? node.attribute().name() : node.node().name();
8664 }
8665
8666 PUGI_IMPL_FN const char_t* local_name(const xpath_node& node)
8667 {
8668 const char_t* name = qualified_name(node);
8669 const char_t* p = find_char(name, ':');
8670
8671 return p ? p + 1 : name;
8672 }
8673
8674 struct namespace_uri_predicate
8675 {
8676 const char_t* prefix;
8677 size_t prefix_length;
8678
8679 namespace_uri_predicate(const char_t* name)
8680 {
8681 const char_t* pos = find_char(name, ':');
8682
8683 prefix = pos ? name : 0;
8684 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8685 }
8686
8687 bool operator()(xml_attribute a) const
8688 {
8689 const char_t* name = a.name();
8690
8691 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8692
8693 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8694 }
8695 };
8696
8697 PUGI_IMPL_FN const char_t* namespace_uri(xml_node node)
8698 {
8699 namespace_uri_predicate pred = node.name();
8700
8701 xml_node p = node;
8702
8703 while (p)
8704 {
8705 xml_attribute a = p.find_attribute(pred);
8706
8707 if (a) return a.value();
8708
8709 p = p.parent();
8710 }
8711
8712 return PUGIXML_TEXT("");
8713 }
8714
8715 PUGI_IMPL_FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8716 {
8717 namespace_uri_predicate pred = attr.name();
8718
8719 // Default namespace does not apply to attributes
8720 if (!pred.prefix) return PUGIXML_TEXT("");
8721
8722 xml_node p = parent;
8723
8724 while (p)
8725 {
8726 xml_attribute a = p.find_attribute(pred);
8727
8728 if (a) return a.value();
8729
8730 p = p.parent();
8731 }
8732
8733 return PUGIXML_TEXT("");
8734 }
8735
8736 PUGI_IMPL_FN const char_t* namespace_uri(const xpath_node& node)
8737 {
8738 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8739 }
8740
8741 PUGI_IMPL_FN char_t* normalize_space(char_t* buffer)
8742 {
8743 char_t* write = buffer;
8744
8745 for (char_t* it = buffer; *it; )
8746 {
8747 char_t ch = *it++;
8748
8749 if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space))
8750 {
8751 // replace whitespace sequence with single space
8752 while (PUGI_IMPL_IS_CHARTYPE(*it, ct_space)) it++;
8753
8754 // avoid leading spaces
8755 if (write != buffer) *write++ = ' ';
8756 }
8757 else *write++ = ch;
8758 }
8759
8760 // remove trailing space
8761 if (write != buffer && PUGI_IMPL_IS_CHARTYPE(write[-1], ct_space)) write--;
8762
8763 // zero-terminate
8764 *write = 0;
8765
8766 return write;
8767 }
8768
8769 PUGI_IMPL_FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8770 {
8771 char_t* write = buffer;
8772
8773 while (*buffer)
8774 {
8775 PUGI_IMPL_DMC_VOLATILE char_t ch = *buffer++;
8776
8777 const char_t* pos = find_char(from, ch);
8778
8779 if (!pos)
8780 *write++ = ch; // do not process
8781 else if (static_cast<size_t>(pos - from) < to_length)
8782 *write++ = to[pos - from]; // replace
8783 }
8784
8785 // zero-terminate
8786 *write = 0;
8787
8788 return write;
8789 }
8790
8791 PUGI_IMPL_FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8792 {
8793 unsigned char table[128] = {0};
8794
8795 while (*from)
8796 {
8797 unsigned int fc = static_cast<unsigned int>(*from);
8798 unsigned int tc = static_cast<unsigned int>(*to);
8799
8800 if (fc >= 128 || tc >= 128)
8801 return 0;
8802
8803 // code=128 means "skip character"
8804 if (!table[fc])
8805 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8806
8807 from++;
8808 if (tc) to++;
8809 }
8810
8811 for (int i = 0; i < 128; ++i)
8812 if (!table[i])
8813 table[i] = static_cast<unsigned char>(i);
8814
8815 void* result = alloc->allocate(sizeof(table));
8816 if (!result) return 0;
8817
8818 memcpy(result, table, sizeof(table));
8819
8820 return static_cast<unsigned char*>(result);
8821 }
8822
8823 PUGI_IMPL_FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8824 {
8825 char_t* write = buffer;
8826
8827 while (*buffer)
8828 {
8829 char_t ch = *buffer++;
8830 unsigned int index = static_cast<unsigned int>(ch);
8831
8832 if (index < 128)
8833 {
8834 unsigned char code = table[index];
8835
8836 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8837 // this code skips these characters without extra branches
8838 *write = static_cast<char_t>(code);
8839 write += 1 - (code >> 7);
8840 }
8841 else
8842 {
8843 *write++ = ch;
8844 }
8845 }
8846
8847 // zero-terminate
8848 *write = 0;
8849
8850 return write;
8851 }
8852
8853 inline bool is_xpath_attribute(const char_t* name)
8854 {
8855 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8856 }
8857
8858 struct xpath_variable_boolean: xpath_variable
8859 {
8860 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8861 {
8862 }
8863
8864 bool value;
8865 char_t name[1];
8866 };
8867
8868 struct xpath_variable_number: xpath_variable
8869 {
8870 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8871 {
8872 }
8873
8874 double value;
8875 char_t name[1];
8876 };
8877
8878 struct xpath_variable_string: xpath_variable
8879 {
8880 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8881 {
8882 }
8883
8884 ~xpath_variable_string()
8885 {
8886 if (value) xml_memory::deallocate(value);
8887 }
8888
8889 char_t* value;
8890 char_t name[1];
8891 };
8892
8893 struct xpath_variable_node_set: xpath_variable
8894 {
8895 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8896 {
8897 }
8898
8899 xpath_node_set value;
8900 char_t name[1];
8901 };
8902
8903 static const xpath_node_set dummy_node_set;
8904
8905 PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8906 {
8907 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8908 unsigned int result = 0;
8909
8910 while (*str)
8911 {
8912 result += static_cast<unsigned int>(*str++);
8913 result += result << 10;
8914 result ^= result >> 6;
8915 }
8916
8917 result += result << 3;
8918 result ^= result >> 11;
8919 result += result << 15;
8920
8921 return result;
8922 }
8923
8924 template <typename T> PUGI_IMPL_FN T* new_xpath_variable(const char_t* name)
8925 {
8926 size_t length = strlength(name);
8927 if (length == 0) return 0; // empty variable names are invalid
8928
8929 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8930 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8931 if (!memory) return 0;
8932
8933 T* result = new (memory) T();
8934
8935 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8936
8937 return result;
8938 }
8939
8940 PUGI_IMPL_FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8941 {
8942 switch (type)
8943 {
8944 case xpath_type_node_set:
8945 return new_xpath_variable<xpath_variable_node_set>(name);
8946
8947 case xpath_type_number:
8948 return new_xpath_variable<xpath_variable_number>(name);
8949
8950 case xpath_type_string:
8951 return new_xpath_variable<xpath_variable_string>(name);
8952
8953 case xpath_type_boolean:
8954 return new_xpath_variable<xpath_variable_boolean>(name);
8955
8956 default:
8957 return 0;
8958 }
8959 }
8960
8961 template <typename T> PUGI_IMPL_FN void delete_xpath_variable(T* var)
8962 {
8963 var->~T();
8964 xml_memory::deallocate(var);
8965 }
8966
8967 PUGI_IMPL_FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8968 {
8969 switch (type)
8970 {
8971 case xpath_type_node_set:
8972 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8973 break;
8974
8975 case xpath_type_number:
8976 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8977 break;
8978
8979 case xpath_type_string:
8980 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8981 break;
8982
8983 case xpath_type_boolean:
8984 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8985 break;
8986
8987 default:
8988 assert(false && "Invalid variable type"); // unreachable
8989 }
8990 }
8991
8992 PUGI_IMPL_FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8993 {
8994 switch (rhs->type())
8995 {
8996 case xpath_type_node_set:
8997 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8998
8999 case xpath_type_number:
9000 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
9001
9002 case xpath_type_string:
9003 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
9004
9005 case xpath_type_boolean:
9006 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
9007
9008 default:
9009 assert(false && "Invalid variable type"); // unreachable
9010 return false;
9011 }
9012 }
9013
9014 PUGI_IMPL_FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
9015 {
9016 size_t length = static_cast<size_t>(end - begin);
9017 char_t* scratch = buffer;
9018
9019 if (length >= sizeof(buffer) / sizeof(buffer[0]))
9020 {
9021 // need to make dummy on-heap copy
9022 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
9023 if (!scratch) return false;
9024 }
9025
9026 // copy string to zero-terminated buffer and perform lookup
9027 memcpy(scratch, begin, length * sizeof(char_t));
9028 scratch[length] = 0;
9029
9030 *out_result = set->get(scratch);
9031
9032 // free dummy buffer
9033 if (scratch != buffer) xml_memory::deallocate(scratch);
9034
9035 return true;
9036 }
9037 PUGI_IMPL_NS_END
9038
9039 // Internal node set class
9040 PUGI_IMPL_NS_BEGIN
9041 PUGI_IMPL_FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
9042 {
9043 if (end - begin < 2)
9044 return xpath_node_set::type_sorted;
9045
9046 document_order_comparator cmp;
9047
9048 bool first = cmp(begin[0], begin[1]);
9049
9050 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
9051 if (cmp(it[0], it[1]) != first)
9052 return xpath_node_set::type_unsorted;
9053
9054 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
9055 }
9056
9057 PUGI_IMPL_FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
9058 {
9059 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
9060
9061 if (type == xpath_node_set::type_unsorted)
9062 {
9063 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
9064
9065 if (sorted == xpath_node_set::type_unsorted)
9066 {
9067 sort(begin, end, document_order_comparator());
9068
9069 type = xpath_node_set::type_sorted;
9070 }
9071 else
9072 type = sorted;
9073 }
9074
9075 if (type != order) reverse(begin, end);
9076
9077 return order;
9078 }
9079
9080 PUGI_IMPL_FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
9081 {
9082 if (begin == end) return xpath_node();
9083
9084 switch (type)
9085 {
9086 case xpath_node_set::type_sorted:
9087 return *begin;
9088
9089 case xpath_node_set::type_sorted_reverse:
9090 return *(end - 1);
9091
9092 case xpath_node_set::type_unsorted:
9093 return *min_element(begin, end, document_order_comparator());
9094
9095 default:
9096 assert(false && "Invalid node set type"); // unreachable
9097 return xpath_node();
9098 }
9099 }
9100
9101 class xpath_node_set_raw
9102 {
9103 xpath_node_set::type_t _type;
9104
9105 xpath_node* _begin;
9106 xpath_node* _end;
9107 xpath_node* _eos;
9108
9109 public:
9110 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
9111 {
9112 }
9113
9114 xpath_node* begin() const
9115 {
9116 return _begin;
9117 }
9118
9119 xpath_node* end() const
9120 {
9121 return _end;
9122 }
9123
9124 bool empty() const
9125 {
9126 return _begin == _end;
9127 }
9128
9129 size_t size() const
9130 {
9131 return static_cast<size_t>(_end - _begin);
9132 }
9133
9134 xpath_node first() const
9135 {
9136 return xpath_first(_begin, _end, _type);
9137 }
9138
9139 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
9140
9141 void push_back(const xpath_node& node, xpath_allocator* alloc)
9142 {
9143 if (_end != _eos)
9144 *_end++ = node;
9145 else
9146 push_back_grow(node, alloc);
9147 }
9148
9149 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
9150 {
9151 if (begin_ == end_) return;
9152
9153 size_t size_ = static_cast<size_t>(_end - _begin);
9154 size_t capacity = static_cast<size_t>(_eos - _begin);
9155 size_t count = static_cast<size_t>(end_ - begin_);
9156
9157 if (size_ + count > capacity)
9158 {
9159 // reallocate the old array or allocate a new one
9160 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
9161 if (!data) return;
9162
9163 // finalize
9164 _begin = data;
9165 _end = data + size_;
9166 _eos = data + size_ + count;
9167 }
9168
9169 memcpy(_end, begin_, count * sizeof(xpath_node));
9170 _end += count;
9171 }
9172
9173 void sort_do()
9174 {
9175 _type = xpath_sort(_begin, _end, _type, false);
9176 }
9177
9178 void truncate(xpath_node* pos)
9179 {
9180 assert(_begin <= pos && pos <= _end);
9181
9182 _end = pos;
9183 }
9184
9185 void remove_duplicates(xpath_allocator* alloc)
9186 {
9187 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
9188 {
9189 xpath_allocator_capture cr(alloc);
9190
9191 size_t size_ = static_cast<size_t>(_end - _begin);
9192
9193 size_t hash_size = 1;
9194 while (hash_size < size_ + size_ / 2) hash_size *= 2;
9195
9196 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
9197 if (!hash_data) return;
9198
9199 memset(hash_data, 0, hash_size * sizeof(const void**));
9200
9201 xpath_node* write = _begin;
9202
9203 for (xpath_node* it = _begin; it != _end; ++it)
9204 {
9205 const void* attr = it->attribute().internal_object();
9206 const void* node = it->node().internal_object();
9207 const void* key = attr ? attr : node;
9208
9209 if (key && hash_insert(hash_data, hash_size, key))
9210 {
9211 *write++ = *it;
9212 }
9213 }
9214
9215 _end = write;
9216 }
9217 else
9218 {
9219 _end = unique(_begin, _end);
9220 }
9221 }
9222
9223 xpath_node_set::type_t type() const
9224 {
9225 return _type;
9226 }
9227
9228 void set_type(xpath_node_set::type_t value)
9229 {
9230 _type = value;
9231 }
9232 };
9233
9234 PUGI_IMPL_FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9235 {
9236 size_t capacity = static_cast<size_t>(_eos - _begin);
9237
9238 // get new capacity (1.5x rule)
9239 size_t new_capacity = capacity + capacity / 2 + 1;
9240
9241 // reallocate the old array or allocate a new one
9242 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9243 if (!data) return;
9244
9245 // finalize
9246 _begin = data;
9247 _end = data + capacity;
9248 _eos = data + new_capacity;
9249
9250 // push
9251 *_end++ = node;
9252 }
9253 PUGI_IMPL_NS_END
9254
9255 PUGI_IMPL_NS_BEGIN
9256 struct xpath_context
9257 {
9258 xpath_node n;
9259 size_t position, size;
9260
9261 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9262 {
9263 }
9264 };
9265
9266 enum lexeme_t
9267 {
9268 lex_none = 0,
9269 lex_equal,
9270 lex_not_equal,
9271 lex_less,
9272 lex_greater,
9273 lex_less_or_equal,
9274 lex_greater_or_equal,
9275 lex_plus,
9276 lex_minus,
9277 lex_multiply,
9278 lex_union,
9279 lex_var_ref,
9280 lex_open_brace,
9281 lex_close_brace,
9282 lex_quoted_string,
9283 lex_number,
9284 lex_slash,
9285 lex_double_slash,
9286 lex_open_square_brace,
9287 lex_close_square_brace,
9288 lex_string,
9289 lex_comma,
9290 lex_axis_attribute,
9291 lex_dot,
9292 lex_double_dot,
9293 lex_double_colon,
9294 lex_eof
9295 };
9296
9297 struct xpath_lexer_string
9298 {
9299 const char_t* begin;
9300 const char_t* end;
9301
9302 xpath_lexer_string(): begin(0), end(0)
9303 {
9304 }
9305
9306 bool operator==(const char_t* other) const
9307 {
9308 size_t length = static_cast<size_t>(end - begin);
9309
9310 return strequalrange(other, begin, length);
9311 }
9312 };
9313
9314 class xpath_lexer
9315 {
9316 const char_t* _cur;
9317 const char_t* _cur_lexeme_pos;
9318 xpath_lexer_string _cur_lexeme_contents;
9319
9320 lexeme_t _cur_lexeme;
9321
9322 public:
9323 explicit xpath_lexer(const char_t* query): _cur(query)
9324 {
9325 next();
9326 }
9327
9328 const char_t* state() const
9329 {
9330 return _cur;
9331 }
9332
9333 void next()
9334 {
9335 const char_t* cur = _cur;
9336
9337 while (PUGI_IMPL_IS_CHARTYPE(*cur, ct_space)) ++cur;
9338
9339 // save lexeme position for error reporting
9340 _cur_lexeme_pos = cur;
9341
9342 switch (*cur)
9343 {
9344 case 0:
9345 _cur_lexeme = lex_eof;
9346 break;
9347
9348 case '>':
9349 if (*(cur+1) == '=')
9350 {
9351 cur += 2;
9352 _cur_lexeme = lex_greater_or_equal;
9353 }
9354 else
9355 {
9356 cur += 1;
9357 _cur_lexeme = lex_greater;
9358 }
9359 break;
9360
9361 case '<':
9362 if (*(cur+1) == '=')
9363 {
9364 cur += 2;
9365 _cur_lexeme = lex_less_or_equal;
9366 }
9367 else
9368 {
9369 cur += 1;
9370 _cur_lexeme = lex_less;
9371 }
9372 break;
9373
9374 case '!':
9375 if (*(cur+1) == '=')
9376 {
9377 cur += 2;
9378 _cur_lexeme = lex_not_equal;
9379 }
9380 else
9381 {
9382 _cur_lexeme = lex_none;
9383 }
9384 break;
9385
9386 case '=':
9387 cur += 1;
9388 _cur_lexeme = lex_equal;
9389
9390 break;
9391
9392 case '+':
9393 cur += 1;
9394 _cur_lexeme = lex_plus;
9395
9396 break;
9397
9398 case '-':
9399 cur += 1;
9400 _cur_lexeme = lex_minus;
9401
9402 break;
9403
9404 case '*':
9405 cur += 1;
9406 _cur_lexeme = lex_multiply;
9407
9408 break;
9409
9410 case '|':
9411 cur += 1;
9412 _cur_lexeme = lex_union;
9413
9414 break;
9415
9416 case '$':
9417 cur += 1;
9418
9419 if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol))
9420 {
9421 _cur_lexeme_contents.begin = cur;
9422
9423 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9424
9425 if (cur[0] == ':' && PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9426 {
9427 cur++; // :
9428
9429 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9430 }
9431
9432 _cur_lexeme_contents.end = cur;
9433
9434 _cur_lexeme = lex_var_ref;
9435 }
9436 else
9437 {
9438 _cur_lexeme = lex_none;
9439 }
9440
9441 break;
9442
9443 case '(':
9444 cur += 1;
9445 _cur_lexeme = lex_open_brace;
9446
9447 break;
9448
9449 case ')':
9450 cur += 1;
9451 _cur_lexeme = lex_close_brace;
9452
9453 break;
9454
9455 case '[':
9456 cur += 1;
9457 _cur_lexeme = lex_open_square_brace;
9458
9459 break;
9460
9461 case ']':
9462 cur += 1;
9463 _cur_lexeme = lex_close_square_brace;
9464
9465 break;
9466
9467 case ',':
9468 cur += 1;
9469 _cur_lexeme = lex_comma;
9470
9471 break;
9472
9473 case '/':
9474 if (*(cur+1) == '/')
9475 {
9476 cur += 2;
9477 _cur_lexeme = lex_double_slash;
9478 }
9479 else
9480 {
9481 cur += 1;
9482 _cur_lexeme = lex_slash;
9483 }
9484 break;
9485
9486 case '.':
9487 if (*(cur+1) == '.')
9488 {
9489 cur += 2;
9490 _cur_lexeme = lex_double_dot;
9491 }
9492 else if (PUGI_IMPL_IS_CHARTYPEX(*(cur+1), ctx_digit))
9493 {
9494 _cur_lexeme_contents.begin = cur; // .
9495
9496 ++cur;
9497
9498 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9499
9500 _cur_lexeme_contents.end = cur;
9501
9502 _cur_lexeme = lex_number;
9503 }
9504 else
9505 {
9506 cur += 1;
9507 _cur_lexeme = lex_dot;
9508 }
9509 break;
9510
9511 case '@':
9512 cur += 1;
9513 _cur_lexeme = lex_axis_attribute;
9514
9515 break;
9516
9517 case '"':
9518 case '\'':
9519 {
9520 char_t terminator = *cur;
9521
9522 ++cur;
9523
9524 _cur_lexeme_contents.begin = cur;
9525 while (*cur && *cur != terminator) cur++;
9526 _cur_lexeme_contents.end = cur;
9527
9528 if (!*cur)
9529 _cur_lexeme = lex_none;
9530 else
9531 {
9532 cur += 1;
9533 _cur_lexeme = lex_quoted_string;
9534 }
9535
9536 break;
9537 }
9538
9539 case ':':
9540 if (*(cur+1) == ':')
9541 {
9542 cur += 2;
9543 _cur_lexeme = lex_double_colon;
9544 }
9545 else
9546 {
9547 _cur_lexeme = lex_none;
9548 }
9549 break;
9550
9551 default:
9552 if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit))
9553 {
9554 _cur_lexeme_contents.begin = cur;
9555
9556 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9557
9558 if (*cur == '.')
9559 {
9560 cur++;
9561
9562 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9563 }
9564
9565 _cur_lexeme_contents.end = cur;
9566
9567 _cur_lexeme = lex_number;
9568 }
9569 else if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol))
9570 {
9571 _cur_lexeme_contents.begin = cur;
9572
9573 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9574
9575 if (cur[0] == ':')
9576 {
9577 if (cur[1] == '*') // namespace test ncname:*
9578 {
9579 cur += 2; // :*
9580 }
9581 else if (PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9582 {
9583 cur++; // :
9584
9585 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9586 }
9587 }
9588
9589 _cur_lexeme_contents.end = cur;
9590
9591 _cur_lexeme = lex_string;
9592 }
9593 else
9594 {
9595 _cur_lexeme = lex_none;
9596 }
9597 }
9598
9599 _cur = cur;
9600 }
9601
9602 lexeme_t current() const
9603 {
9604 return _cur_lexeme;
9605 }
9606
9607 const char_t* current_pos() const
9608 {
9609 return _cur_lexeme_pos;
9610 }
9611
9612 const xpath_lexer_string& contents() const
9613 {
9614 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9615
9616 return _cur_lexeme_contents;
9617 }
9618 };
9619
9620 enum ast_type_t
9621 {
9622 ast_unknown,
9623 ast_op_or, // left or right
9624 ast_op_and, // left and right
9625 ast_op_equal, // left = right
9626 ast_op_not_equal, // left != right
9627 ast_op_less, // left < right
9628 ast_op_greater, // left > right
9629 ast_op_less_or_equal, // left <= right
9630 ast_op_greater_or_equal, // left >= right
9631 ast_op_add, // left + right
9632 ast_op_subtract, // left - right
9633 ast_op_multiply, // left * right
9634 ast_op_divide, // left / right
9635 ast_op_mod, // left % right
9636 ast_op_negate, // left - right
9637 ast_op_union, // left | right
9638 ast_predicate, // apply predicate to set; next points to next predicate
9639 ast_filter, // select * from left where right
9640 ast_string_constant, // string constant
9641 ast_number_constant, // number constant
9642 ast_variable, // variable
9643 ast_func_last, // last()
9644 ast_func_position, // position()
9645 ast_func_count, // count(left)
9646 ast_func_id, // id(left)
9647 ast_func_local_name_0, // local-name()
9648 ast_func_local_name_1, // local-name(left)
9649 ast_func_namespace_uri_0, // namespace-uri()
9650 ast_func_namespace_uri_1, // namespace-uri(left)
9651 ast_func_name_0, // name()
9652 ast_func_name_1, // name(left)
9653 ast_func_string_0, // string()
9654 ast_func_string_1, // string(left)
9655 ast_func_concat, // concat(left, right, siblings)
9656 ast_func_starts_with, // starts_with(left, right)
9657 ast_func_contains, // contains(left, right)
9658 ast_func_substring_before, // substring-before(left, right)
9659 ast_func_substring_after, // substring-after(left, right)
9660 ast_func_substring_2, // substring(left, right)
9661 ast_func_substring_3, // substring(left, right, third)
9662 ast_func_string_length_0, // string-length()
9663 ast_func_string_length_1, // string-length(left)
9664 ast_func_normalize_space_0, // normalize-space()
9665 ast_func_normalize_space_1, // normalize-space(left)
9666 ast_func_translate, // translate(left, right, third)
9667 ast_func_boolean, // boolean(left)
9668 ast_func_not, // not(left)
9669 ast_func_true, // true()
9670 ast_func_false, // false()
9671 ast_func_lang, // lang(left)
9672 ast_func_number_0, // number()
9673 ast_func_number_1, // number(left)
9674 ast_func_sum, // sum(left)
9675 ast_func_floor, // floor(left)
9676 ast_func_ceiling, // ceiling(left)
9677 ast_func_round, // round(left)
9678 ast_step, // process set left with step
9679 ast_step_root, // select root node
9680
9681 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9682 ast_opt_compare_attribute // @name = 'string'
9683 };
9684
9685 enum axis_t
9686 {
9687 axis_ancestor,
9688 axis_ancestor_or_self,
9689 axis_attribute,
9690 axis_child,
9691 axis_descendant,
9692 axis_descendant_or_self,
9693 axis_following,
9694 axis_following_sibling,
9695 axis_namespace,
9696 axis_parent,
9697 axis_preceding,
9698 axis_preceding_sibling,
9699 axis_self
9700 };
9701
9702 enum nodetest_t
9703 {
9704 nodetest_none,
9705 nodetest_name,
9706 nodetest_type_node,
9707 nodetest_type_comment,
9708 nodetest_type_pi,
9709 nodetest_type_text,
9710 nodetest_pi,
9711 nodetest_all,
9712 nodetest_all_in_namespace
9713 };
9714
9715 enum predicate_t
9716 {
9717 predicate_default,
9718 predicate_posinv,
9719 predicate_constant,
9720 predicate_constant_one
9721 };
9722
9723 enum nodeset_eval_t
9724 {
9725 nodeset_eval_all,
9726 nodeset_eval_any,
9727 nodeset_eval_first
9728 };
9729
9730 template <axis_t N> struct axis_to_type
9731 {
9732 static const axis_t axis;
9733 };
9734
9735 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9736
9737 class xpath_ast_node
9738 {
9739 private:
9740 // node type
9741 char _type;
9742 char _rettype;
9743
9744 // for ast_step
9745 char _axis;
9746
9747 // for ast_step/ast_predicate/ast_filter
9748 char _test;
9749
9750 // tree node structure
9751 xpath_ast_node* _left;
9752 xpath_ast_node* _right;
9753 xpath_ast_node* _next;
9754
9755 union
9756 {
9757 // value for ast_string_constant
9758 const char_t* string;
9759 // value for ast_number_constant
9760 double number;
9761 // variable for ast_variable
9762 xpath_variable* variable;
9763 // node test for ast_step (node name/namespace/node type/pi target)
9764 const char_t* nodetest;
9765 // table for ast_opt_translate_table
9766 const unsigned char* table;
9767 } _data;
9768
9769 xpath_ast_node(const xpath_ast_node&);
9770 xpath_ast_node& operator=(const xpath_ast_node&);
9771
9772 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9773 {
9774 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9775
9776 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9777 {
9778 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9779 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9780 else if (lt == xpath_type_number || rt == xpath_type_number)
9781 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9782 else if (lt == xpath_type_string || rt == xpath_type_string)
9783 {
9784 xpath_allocator_capture cr(stack.result);
9785
9786 xpath_string ls = lhs->eval_string(c, stack);
9787 xpath_string rs = rhs->eval_string(c, stack);
9788
9789 return comp(ls, rs);
9790 }
9791 }
9792 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9793 {
9794 xpath_allocator_capture cr(stack.result);
9795
9796 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9797 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9798
9799 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9800 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9801 {
9802 xpath_allocator_capture cri(stack.result);
9803
9804 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9805 return true;
9806 }
9807
9808 return false;
9809 }
9810 else
9811 {
9812 if (lt == xpath_type_node_set)
9813 {
9814 swap(lhs, rhs);
9815 swap(lt, rt);
9816 }
9817
9818 if (lt == xpath_type_boolean)
9819 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9820 else if (lt == xpath_type_number)
9821 {
9822 xpath_allocator_capture cr(stack.result);
9823
9824 double l = lhs->eval_number(c, stack);
9825 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9826
9827 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9828 {
9829 xpath_allocator_capture cri(stack.result);
9830
9831 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9832 return true;
9833 }
9834
9835 return false;
9836 }
9837 else if (lt == xpath_type_string)
9838 {
9839 xpath_allocator_capture cr(stack.result);
9840
9841 xpath_string l = lhs->eval_string(c, stack);
9842 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9843
9844 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9845 {
9846 xpath_allocator_capture cri(stack.result);
9847
9848 if (comp(l, string_value(*ri, stack.result)))
9849 return true;
9850 }
9851
9852 return false;
9853 }
9854 }
9855
9856 assert(false && "Wrong types"); // unreachable
9857 return false;
9858 }
9859
9860 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9861 {
9862 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9863 }
9864
9865 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9866 {
9867 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9868
9869 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9870 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9871 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9872 {
9873 xpath_allocator_capture cr(stack.result);
9874
9875 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9876 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9877
9878 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9879 {
9880 xpath_allocator_capture cri(stack.result);
9881
9882 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9883
9884 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9885 {
9886 xpath_allocator_capture crii(stack.result);
9887
9888 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9889 return true;
9890 }
9891 }
9892
9893 return false;
9894 }
9895 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9896 {
9897 xpath_allocator_capture cr(stack.result);
9898
9899 double l = lhs->eval_number(c, stack);
9900 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9901
9902 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9903 {
9904 xpath_allocator_capture cri(stack.result);
9905
9906 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9907 return true;
9908 }
9909
9910 return false;
9911 }
9912 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9913 {
9914 xpath_allocator_capture cr(stack.result);
9915
9916 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9917 double r = rhs->eval_number(c, stack);
9918
9919 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9920 {
9921 xpath_allocator_capture cri(stack.result);
9922
9923 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9924 return true;
9925 }
9926
9927 return false;
9928 }
9929 else
9930 {
9931 assert(false && "Wrong types"); // unreachable
9932 return false;
9933 }
9934 }
9935
9936 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9937 {
9938 assert(ns.size() >= first);
9939 assert(expr->rettype() != xpath_type_number);
9940
9941 size_t i = 1;
9942 size_t size = ns.size() - first;
9943
9944 xpath_node* last = ns.begin() + first;
9945
9946 // remove_if... or well, sort of
9947 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9948 {
9949 xpath_context c(*it, i, size);
9950
9951 if (expr->eval_boolean(c, stack))
9952 {
9953 *last++ = *it;
9954
9955 if (once) break;
9956 }
9957 }
9958
9959 ns.truncate(last);
9960 }
9961
9962 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9963 {
9964 assert(ns.size() >= first);
9965 assert(expr->rettype() == xpath_type_number);
9966
9967 size_t i = 1;
9968 size_t size = ns.size() - first;
9969
9970 xpath_node* last = ns.begin() + first;
9971
9972 // remove_if... or well, sort of
9973 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9974 {
9975 xpath_context c(*it, i, size);
9976
9977 if (expr->eval_number(c, stack) == static_cast<double>(i))
9978 {
9979 *last++ = *it;
9980
9981 if (once) break;
9982 }
9983 }
9984
9985 ns.truncate(last);
9986 }
9987
9988 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9989 {
9990 assert(ns.size() >= first);
9991 assert(expr->rettype() == xpath_type_number);
9992
9993 size_t size = ns.size() - first;
9994
9995 xpath_node* last = ns.begin() + first;
9996
9997 xpath_node cn;
9998 xpath_context c(cn, 1, size);
9999
10000 double er = expr->eval_number(c, stack);
10001
10002 if (er >= 1.0 && er <= static_cast<double>(size))
10003 {
10004 size_t eri = static_cast<size_t>(er);
10005
10006 if (er == static_cast<double>(eri))
10007 {
10008 xpath_node r = last[eri - 1];
10009
10010 *last++ = r;
10011 }
10012 }
10013
10014 ns.truncate(last);
10015 }
10016
10017 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
10018 {
10019 if (ns.size() == first) return;
10020
10021 assert(_type == ast_filter || _type == ast_predicate);
10022
10023 if (_test == predicate_constant || _test == predicate_constant_one)
10024 apply_predicate_number_const(ns, first, _right, stack);
10025 else if (_right->rettype() == xpath_type_number)
10026 apply_predicate_number(ns, first, _right, stack, once);
10027 else
10028 apply_predicate_boolean(ns, first, _right, stack, once);
10029 }
10030
10031 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
10032 {
10033 if (ns.size() == first) return;
10034
10035 bool last_once = eval_once(ns.type(), eval);
10036
10037 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
10038 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
10039 }
10040
10041 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
10042 {
10043 assert(a);
10044
10045 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
10046
10047 switch (_test)
10048 {
10049 case nodetest_name:
10050 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
10051 {
10052 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
10053 return true;
10054 }
10055 break;
10056
10057 case nodetest_type_node:
10058 case nodetest_all:
10059 if (is_xpath_attribute(name))
10060 {
10061 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
10062 return true;
10063 }
10064 break;
10065
10066 case nodetest_all_in_namespace:
10067 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
10068 {
10069 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
10070 return true;
10071 }
10072 break;
10073
10074 default:
10075 ;
10076 }
10077
10078 return false;
10079 }
10080
10081 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
10082 {
10083 assert(n);
10084
10085 xml_node_type type = PUGI_IMPL_NODETYPE(n);
10086
10087 switch (_test)
10088 {
10089 case nodetest_name:
10090 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
10091 {
10092 ns.push_back(xml_node(n), alloc);
10093 return true;
10094 }
10095 break;
10096
10097 case nodetest_type_node:
10098 ns.push_back(xml_node(n), alloc);
10099 return true;
10100
10101 case nodetest_type_comment:
10102 if (type == node_comment)
10103 {
10104 ns.push_back(xml_node(n), alloc);
10105 return true;
10106 }
10107 break;
10108
10109 case nodetest_type_text:
10110 if (type == node_pcdata || type == node_cdata)
10111 {
10112 ns.push_back(xml_node(n), alloc);
10113 return true;
10114 }
10115 break;
10116
10117 case nodetest_type_pi:
10118 if (type == node_pi)
10119 {
10120 ns.push_back(xml_node(n), alloc);
10121 return true;
10122 }
10123 break;
10124
10125 case nodetest_pi:
10126 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
10127 {
10128 ns.push_back(xml_node(n), alloc);
10129 return true;
10130 }
10131 break;
10132
10133 case nodetest_all:
10134 if (type == node_element)
10135 {
10136 ns.push_back(xml_node(n), alloc);
10137 return true;
10138 }
10139 break;
10140
10141 case nodetest_all_in_namespace:
10142 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
10143 {
10144 ns.push_back(xml_node(n), alloc);
10145 return true;
10146 }
10147 break;
10148
10149 default:
10150 assert(false && "Unknown axis"); // unreachable
10151 }
10152
10153 return false;
10154 }
10155
10156 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
10157 {
10158 const axis_t axis = T::axis;
10159
10160 switch (axis)
10161 {
10162 case axis_attribute:
10163 {
10164 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
10165 if (step_push(ns, a, n, alloc) & once)
10166 return;
10167
10168 break;
10169 }
10170
10171 case axis_child:
10172 {
10173 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
10174 if (step_push(ns, c, alloc) & once)
10175 return;
10176
10177 break;
10178 }
10179
10180 case axis_descendant:
10181 case axis_descendant_or_self:
10182 {
10183 if (axis == axis_descendant_or_self)
10184 if (step_push(ns, n, alloc) & once)
10185 return;
10186
10187 xml_node_struct* cur = n->first_child;
10188
10189 while (cur)
10190 {
10191 if (step_push(ns, cur, alloc) & once)
10192 return;
10193
10194 if (cur->first_child)
10195 cur = cur->first_child;
10196 else
10197 {
10198 while (!cur->next_sibling)
10199 {
10200 cur = cur->parent;
10201
10202 if (cur == n) return;
10203 }
10204
10205 cur = cur->next_sibling;
10206 }
10207 }
10208
10209 break;
10210 }
10211
10212 case axis_following_sibling:
10213 {
10214 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10215 if (step_push(ns, c, alloc) & once)
10216 return;
10217
10218 break;
10219 }
10220
10221 case axis_preceding_sibling:
10222 {
10223 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10224 if (step_push(ns, c, alloc) & once)
10225 return;
10226
10227 break;
10228 }
10229
10230 case axis_following:
10231 {
10232 xml_node_struct* cur = n;
10233
10234 // exit from this node so that we don't include descendants
10235 while (!cur->next_sibling)
10236 {
10237 cur = cur->parent;
10238
10239 if (!cur) return;
10240 }
10241
10242 cur = cur->next_sibling;
10243
10244 while (cur)
10245 {
10246 if (step_push(ns, cur, alloc) & once)
10247 return;
10248
10249 if (cur->first_child)
10250 cur = cur->first_child;
10251 else
10252 {
10253 while (!cur->next_sibling)
10254 {
10255 cur = cur->parent;
10256
10257 if (!cur) return;
10258 }
10259
10260 cur = cur->next_sibling;
10261 }
10262 }
10263
10264 break;
10265 }
10266
10267 case axis_preceding:
10268 {
10269 xml_node_struct* cur = n;
10270
10271 // exit from this node so that we don't include descendants
10272 while (!cur->prev_sibling_c->next_sibling)
10273 {
10274 cur = cur->parent;
10275
10276 if (!cur) return;
10277 }
10278
10279 cur = cur->prev_sibling_c;
10280
10281 while (cur)
10282 {
10283 if (cur->first_child)
10284 cur = cur->first_child->prev_sibling_c;
10285 else
10286 {
10287 // leaf node, can't be ancestor
10288 if (step_push(ns, cur, alloc) & once)
10289 return;
10290
10291 while (!cur->prev_sibling_c->next_sibling)
10292 {
10293 cur = cur->parent;
10294
10295 if (!cur) return;
10296
10297 if (!node_is_ancestor(cur, n))
10298 if (step_push(ns, cur, alloc) & once)
10299 return;
10300 }
10301
10302 cur = cur->prev_sibling_c;
10303 }
10304 }
10305
10306 break;
10307 }
10308
10309 case axis_ancestor:
10310 case axis_ancestor_or_self:
10311 {
10312 if (axis == axis_ancestor_or_self)
10313 if (step_push(ns, n, alloc) & once)
10314 return;
10315
10316 xml_node_struct* cur = n->parent;
10317
10318 while (cur)
10319 {
10320 if (step_push(ns, cur, alloc) & once)
10321 return;
10322
10323 cur = cur->parent;
10324 }
10325
10326 break;
10327 }
10328
10329 case axis_self:
10330 {
10331 step_push(ns, n, alloc);
10332
10333 break;
10334 }
10335
10336 case axis_parent:
10337 {
10338 if (n->parent)
10339 step_push(ns, n->parent, alloc);
10340
10341 break;
10342 }
10343
10344 default:
10345 assert(false && "Unimplemented axis"); // unreachable
10346 }
10347 }
10348
10349 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10350 {
10351 const axis_t axis = T::axis;
10352
10353 switch (axis)
10354 {
10355 case axis_ancestor:
10356 case axis_ancestor_or_self:
10357 {
10358 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10359 if (step_push(ns, a, p, alloc) & once)
10360 return;
10361
10362 xml_node_struct* cur = p;
10363
10364 while (cur)
10365 {
10366 if (step_push(ns, cur, alloc) & once)
10367 return;
10368
10369 cur = cur->parent;
10370 }
10371
10372 break;
10373 }
10374
10375 case axis_descendant_or_self:
10376 case axis_self:
10377 {
10378 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10379 step_push(ns, a, p, alloc);
10380
10381 break;
10382 }
10383
10384 case axis_following:
10385 {
10386 xml_node_struct* cur = p;
10387
10388 while (cur)
10389 {
10390 if (cur->first_child)
10391 cur = cur->first_child;
10392 else
10393 {
10394 while (!cur->next_sibling)
10395 {
10396 cur = cur->parent;
10397
10398 if (!cur) return;
10399 }
10400
10401 cur = cur->next_sibling;
10402 }
10403
10404 if (step_push(ns, cur, alloc) & once)
10405 return;
10406 }
10407
10408 break;
10409 }
10410
10411 case axis_parent:
10412 {
10413 step_push(ns, p, alloc);
10414
10415 break;
10416 }
10417
10418 case axis_preceding:
10419 {
10420 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10421 step_fill(ns, p, alloc, once, v);
10422 break;
10423 }
10424
10425 default:
10426 assert(false && "Unimplemented axis"); // unreachable
10427 }
10428 }
10429
10430 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10431 {
10432 const axis_t axis = T::axis;
10433 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10434
10435 if (xn.node())
10436 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10437 else if (axis_has_attributes && xn.attribute() && xn.parent())
10438 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10439 }
10440
10441 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10442 {
10443 const axis_t axis = T::axis;
10444 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10445 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10446
10447 bool once =
10448 (axis == axis_attribute && _test == nodetest_name) ||
10449 (!_right && eval_once(axis_type, eval)) ||
10450 // coverity[mixed_enums]
10451 (_right && !_right->_next && _right->_test == predicate_constant_one);
10452
10453 xpath_node_set_raw ns;
10454 ns.set_type(axis_type);
10455
10456 if (_left)
10457 {
10458 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10459
10460 // self axis preserves the original order
10461 if (axis == axis_self) ns.set_type(s.type());
10462
10463 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10464 {
10465 size_t size = ns.size();
10466
10467 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10468 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10469
10470 step_fill(ns, *it, stack.result, once, v);
10471 if (_right) apply_predicates(ns, size, stack, eval);
10472 }
10473 }
10474 else
10475 {
10476 step_fill(ns, c.n, stack.result, once, v);
10477 if (_right) apply_predicates(ns, 0, stack, eval);
10478 }
10479
10480 // child, attribute and self axes always generate unique set of nodes
10481 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10482 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10483 ns.remove_duplicates(stack.temp);
10484
10485 return ns;
10486 }
10487
10488 public:
10489 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10490 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10491 {
10492 assert(type == ast_string_constant);
10493 _data.string = value;
10494 }
10495
10496 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10497 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10498 {
10499 assert(type == ast_number_constant);
10500 _data.number = value;
10501 }
10502
10503 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10504 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10505 {
10506 assert(type == ast_variable);
10507 _data.variable = value;
10508 }
10509
10510 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10511 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10512 {
10513 }
10514
10515 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10516 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10517 {
10518 assert(type == ast_step);
10519 _data.nodetest = contents;
10520 }
10521
10522 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10523 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10524 {
10525 assert(type == ast_filter || type == ast_predicate);
10526 }
10527
10528 void set_next(xpath_ast_node* value)
10529 {
10530 _next = value;
10531 }
10532
10533 void set_right(xpath_ast_node* value)
10534 {
10535 _right = value;
10536 }
10537
10538 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10539 {
10540 switch (_type)
10541 {
10542 case ast_op_or:
10543 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10544
10545 case ast_op_and:
10546 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10547
10548 case ast_op_equal:
10549 return compare_eq(_left, _right, c, stack, equal_to());
10550
10551 case ast_op_not_equal:
10552 return compare_eq(_left, _right, c, stack, not_equal_to());
10553
10554 case ast_op_less:
10555 return compare_rel(_left, _right, c, stack, less());
10556
10557 case ast_op_greater:
10558 return compare_rel(_right, _left, c, stack, less());
10559
10560 case ast_op_less_or_equal:
10561 return compare_rel(_left, _right, c, stack, less_equal());
10562
10563 case ast_op_greater_or_equal:
10564 return compare_rel(_right, _left, c, stack, less_equal());
10565
10566 case ast_func_starts_with:
10567 {
10568 xpath_allocator_capture cr(stack.result);
10569
10570 xpath_string lr = _left->eval_string(c, stack);
10571 xpath_string rr = _right->eval_string(c, stack);
10572
10573 return starts_with(lr.c_str(), rr.c_str());
10574 }
10575
10576 case ast_func_contains:
10577 {
10578 xpath_allocator_capture cr(stack.result);
10579
10580 xpath_string lr = _left->eval_string(c, stack);
10581 xpath_string rr = _right->eval_string(c, stack);
10582
10583 return find_substring(lr.c_str(), rr.c_str()) != 0;
10584 }
10585
10586 case ast_func_boolean:
10587 return _left->eval_boolean(c, stack);
10588
10589 case ast_func_not:
10590 return !_left->eval_boolean(c, stack);
10591
10592 case ast_func_true:
10593 return true;
10594
10595 case ast_func_false:
10596 return false;
10597
10598 case ast_func_lang:
10599 {
10600 if (c.n.attribute()) return false;
10601
10602 xpath_allocator_capture cr(stack.result);
10603
10604 xpath_string lang = _left->eval_string(c, stack);
10605
10606 for (xml_node n = c.n.node(); n; n = n.parent())
10607 {
10608 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10609
10610 if (a)
10611 {
10612 const char_t* value = a.value();
10613
10614 // strnicmp / strncasecmp is not portable
10615 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10616 {
10617 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10618 ++value;
10619 }
10620
10621 return *value == 0 || *value == '-';
10622 }
10623 }
10624
10625 return false;
10626 }
10627
10628 case ast_opt_compare_attribute:
10629 {
10630 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10631
10632 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10633
10634 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10635 }
10636
10637 case ast_variable:
10638 {
10639 assert(_rettype == _data.variable->type());
10640
10641 if (_rettype == xpath_type_boolean)
10642 return _data.variable->get_boolean();
10643
10644 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10645 break;
10646 }
10647
10648 default:
10649 ;
10650 }
10651
10652 // none of the ast types that return the value directly matched, we need to perform type conversion
10653 switch (_rettype)
10654 {
10655 case xpath_type_number:
10656 return convert_number_to_boolean(eval_number(c, stack));
10657
10658 case xpath_type_string:
10659 {
10660 xpath_allocator_capture cr(stack.result);
10661
10662 return !eval_string(c, stack).empty();
10663 }
10664
10665 case xpath_type_node_set:
10666 {
10667 xpath_allocator_capture cr(stack.result);
10668
10669 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10670 }
10671
10672 default:
10673 assert(false && "Wrong expression for return type boolean"); // unreachable
10674 return false;
10675 }
10676 }
10677
10678 double eval_number(const xpath_context& c, const xpath_stack& stack)
10679 {
10680 switch (_type)
10681 {
10682 case ast_op_add:
10683 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10684
10685 case ast_op_subtract:
10686 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10687
10688 case ast_op_multiply:
10689 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10690
10691 case ast_op_divide:
10692 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10693
10694 case ast_op_mod:
10695 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10696
10697 case ast_op_negate:
10698 return -_left->eval_number(c, stack);
10699
10700 case ast_number_constant:
10701 return _data.number;
10702
10703 case ast_func_last:
10704 return static_cast<double>(c.size);
10705
10706 case ast_func_position:
10707 return static_cast<double>(c.position);
10708
10709 case ast_func_count:
10710 {
10711 xpath_allocator_capture cr(stack.result);
10712
10713 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10714 }
10715
10716 case ast_func_string_length_0:
10717 {
10718 xpath_allocator_capture cr(stack.result);
10719
10720 return static_cast<double>(string_value(c.n, stack.result).length());
10721 }
10722
10723 case ast_func_string_length_1:
10724 {
10725 xpath_allocator_capture cr(stack.result);
10726
10727 return static_cast<double>(_left->eval_string(c, stack).length());
10728 }
10729
10730 case ast_func_number_0:
10731 {
10732 xpath_allocator_capture cr(stack.result);
10733
10734 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10735 }
10736
10737 case ast_func_number_1:
10738 return _left->eval_number(c, stack);
10739
10740 case ast_func_sum:
10741 {
10742 xpath_allocator_capture cr(stack.result);
10743
10744 double r = 0;
10745
10746 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10747
10748 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10749 {
10750 xpath_allocator_capture cri(stack.result);
10751
10752 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10753 }
10754
10755 return r;
10756 }
10757
10758 case ast_func_floor:
10759 {
10760 double r = _left->eval_number(c, stack);
10761
10762 return r == r ? floor(r) : r;
10763 }
10764
10765 case ast_func_ceiling:
10766 {
10767 double r = _left->eval_number(c, stack);
10768
10769 return r == r ? ceil(r) : r;
10770 }
10771
10772 case ast_func_round:
10773 return round_nearest_nzero(_left->eval_number(c, stack));
10774
10775 case ast_variable:
10776 {
10777 assert(_rettype == _data.variable->type());
10778
10779 if (_rettype == xpath_type_number)
10780 return _data.variable->get_number();
10781
10782 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10783 break;
10784 }
10785
10786 default:
10787 ;
10788 }
10789
10790 // none of the ast types that return the value directly matched, we need to perform type conversion
10791 switch (_rettype)
10792 {
10793 case xpath_type_boolean:
10794 return eval_boolean(c, stack) ? 1 : 0;
10795
10796 case xpath_type_string:
10797 {
10798 xpath_allocator_capture cr(stack.result);
10799
10800 return convert_string_to_number(eval_string(c, stack).c_str());
10801 }
10802
10803 case xpath_type_node_set:
10804 {
10805 xpath_allocator_capture cr(stack.result);
10806
10807 return convert_string_to_number(eval_string(c, stack).c_str());
10808 }
10809
10810 default:
10811 assert(false && "Wrong expression for return type number"); // unreachable
10812 return 0;
10813 }
10814 }
10815
10816 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10817 {
10818 assert(_type == ast_func_concat);
10819
10820 xpath_allocator_capture ct(stack.temp);
10821
10822 // count the string number
10823 size_t count = 1;
10824 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10825
10826 // allocate a buffer for temporary string objects
10827 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10828 if (!buffer) return xpath_string();
10829
10830 // evaluate all strings to temporary stack
10831 xpath_stack swapped_stack = {stack.temp, stack.result};
10832
10833 buffer[0] = _left->eval_string(c, swapped_stack);
10834
10835 size_t pos = 1;
10836 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10837 assert(pos == count);
10838
10839 // get total length
10840 size_t length = 0;
10841 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10842
10843 // create final string
10844 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10845 if (!result) return xpath_string();
10846
10847 char_t* ri = result;
10848
10849 for (size_t j = 0; j < count; ++j)
10850 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10851 *ri++ = *bi;
10852
10853 *ri = 0;
10854
10855 return xpath_string::from_heap_preallocated(result, ri);
10856 }
10857
10858 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10859 {
10860 switch (_type)
10861 {
10862 case ast_string_constant:
10863 return xpath_string::from_const(_data.string);
10864
10865 case ast_func_local_name_0:
10866 {
10867 xpath_node na = c.n;
10868
10869 return xpath_string::from_const(local_name(na));
10870 }
10871
10872 case ast_func_local_name_1:
10873 {
10874 xpath_allocator_capture cr(stack.result);
10875
10876 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10877 xpath_node na = ns.first();
10878
10879 return xpath_string::from_const(local_name(na));
10880 }
10881
10882 case ast_func_name_0:
10883 {
10884 xpath_node na = c.n;
10885
10886 return xpath_string::from_const(qualified_name(na));
10887 }
10888
10889 case ast_func_name_1:
10890 {
10891 xpath_allocator_capture cr(stack.result);
10892
10893 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10894 xpath_node na = ns.first();
10895
10896 return xpath_string::from_const(qualified_name(na));
10897 }
10898
10899 case ast_func_namespace_uri_0:
10900 {
10901 xpath_node na = c.n;
10902
10903 return xpath_string::from_const(namespace_uri(na));
10904 }
10905
10906 case ast_func_namespace_uri_1:
10907 {
10908 xpath_allocator_capture cr(stack.result);
10909
10910 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10911 xpath_node na = ns.first();
10912
10913 return xpath_string::from_const(namespace_uri(na));
10914 }
10915
10916 case ast_func_string_0:
10917 return string_value(c.n, stack.result);
10918
10919 case ast_func_string_1:
10920 return _left->eval_string(c, stack);
10921
10922 case ast_func_concat:
10923 return eval_string_concat(c, stack);
10924
10925 case ast_func_substring_before:
10926 {
10927 xpath_allocator_capture cr(stack.temp);
10928
10929 xpath_stack swapped_stack = {stack.temp, stack.result};
10930
10931 xpath_string s = _left->eval_string(c, swapped_stack);
10932 xpath_string p = _right->eval_string(c, swapped_stack);
10933
10934 const char_t* pos = find_substring(s.c_str(), p.c_str());
10935
10936 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10937 }
10938
10939 case ast_func_substring_after:
10940 {
10941 xpath_allocator_capture cr(stack.temp);
10942
10943 xpath_stack swapped_stack = {stack.temp, stack.result};
10944
10945 xpath_string s = _left->eval_string(c, swapped_stack);
10946 xpath_string p = _right->eval_string(c, swapped_stack);
10947
10948 const char_t* pos = find_substring(s.c_str(), p.c_str());
10949 if (!pos) return xpath_string();
10950
10951 const char_t* rbegin = pos + p.length();
10952 const char_t* rend = s.c_str() + s.length();
10953
10954 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10955 }
10956
10957 case ast_func_substring_2:
10958 {
10959 xpath_allocator_capture cr(stack.temp);
10960
10961 xpath_stack swapped_stack = {stack.temp, stack.result};
10962
10963 xpath_string s = _left->eval_string(c, swapped_stack);
10964 size_t s_length = s.length();
10965
10966 double first = round_nearest(_right->eval_number(c, stack));
10967
10968 if (is_nan(first)) return xpath_string(); // NaN
10969 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10970
10971 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10972 assert(1 <= pos && pos <= s_length + 1);
10973
10974 const char_t* rbegin = s.c_str() + (pos - 1);
10975 const char_t* rend = s.c_str() + s.length();
10976
10977 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10978 }
10979
10980 case ast_func_substring_3:
10981 {
10982 xpath_allocator_capture cr(stack.temp);
10983
10984 xpath_stack swapped_stack = {stack.temp, stack.result};
10985
10986 xpath_string s = _left->eval_string(c, swapped_stack);
10987 size_t s_length = s.length();
10988
10989 double first = round_nearest(_right->eval_number(c, stack));
10990 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10991
10992 if (is_nan(first) || is_nan(last)) return xpath_string();
10993 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10994 else if (first >= last) return xpath_string();
10995 else if (last < 1) return xpath_string();
10996
10997 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10998 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10999
11000 assert(1 <= pos && pos <= end && end <= s_length + 1);
11001 const char_t* rbegin = s.c_str() + (pos - 1);
11002 const char_t* rend = s.c_str() + (end - 1);
11003
11004 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
11005 }
11006
11007 case ast_func_normalize_space_0:
11008 {
11009 xpath_string s = string_value(c.n, stack.result);
11010
11011 char_t* begin = s.data(stack.result);
11012 if (!begin) return xpath_string();
11013
11014 char_t* end = normalize_space(begin);
11015
11016 return xpath_string::from_heap_preallocated(begin, end);
11017 }
11018
11019 case ast_func_normalize_space_1:
11020 {
11021 xpath_string s = _left->eval_string(c, stack);
11022
11023 char_t* begin = s.data(stack.result);
11024 if (!begin) return xpath_string();
11025
11026 char_t* end = normalize_space(begin);
11027
11028 return xpath_string::from_heap_preallocated(begin, end);
11029 }
11030
11031 case ast_func_translate:
11032 {
11033 xpath_allocator_capture cr(stack.temp);
11034
11035 xpath_stack swapped_stack = {stack.temp, stack.result};
11036
11037 xpath_string s = _left->eval_string(c, stack);
11038 xpath_string from = _right->eval_string(c, swapped_stack);
11039 xpath_string to = _right->_next->eval_string(c, swapped_stack);
11040
11041 char_t* begin = s.data(stack.result);
11042 if (!begin) return xpath_string();
11043
11044 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
11045
11046 return xpath_string::from_heap_preallocated(begin, end);
11047 }
11048
11049 case ast_opt_translate_table:
11050 {
11051 xpath_string s = _left->eval_string(c, stack);
11052
11053 char_t* begin = s.data(stack.result);
11054 if (!begin) return xpath_string();
11055
11056 char_t* end = translate_table(begin, _data.table);
11057
11058 return xpath_string::from_heap_preallocated(begin, end);
11059 }
11060
11061 case ast_variable:
11062 {
11063 assert(_rettype == _data.variable->type());
11064
11065 if (_rettype == xpath_type_string)
11066 return xpath_string::from_const(_data.variable->get_string());
11067
11068 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11069 break;
11070 }
11071
11072 default:
11073 ;
11074 }
11075
11076 // none of the ast types that return the value directly matched, we need to perform type conversion
11077 switch (_rettype)
11078 {
11079 case xpath_type_boolean:
11080 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
11081
11082 case xpath_type_number:
11083 return convert_number_to_string(eval_number(c, stack), stack.result);
11084
11085 case xpath_type_node_set:
11086 {
11087 xpath_allocator_capture cr(stack.temp);
11088
11089 xpath_stack swapped_stack = {stack.temp, stack.result};
11090
11091 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
11092 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
11093 }
11094
11095 default:
11096 assert(false && "Wrong expression for return type string"); // unreachable
11097 return xpath_string();
11098 }
11099 }
11100
11101 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
11102 {
11103 switch (_type)
11104 {
11105 case ast_op_union:
11106 {
11107 xpath_allocator_capture cr(stack.temp);
11108
11109 xpath_stack swapped_stack = {stack.temp, stack.result};
11110
11111 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
11112 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
11113
11114 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
11115 ls.set_type(xpath_node_set::type_unsorted);
11116
11117 ls.append(rs.begin(), rs.end(), stack.result);
11118 ls.remove_duplicates(stack.temp);
11119
11120 return ls;
11121 }
11122
11123 case ast_filter:
11124 {
11125 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
11126
11127 // either expression is a number or it contains position() call; sort by document order
11128 if (_test != predicate_posinv) set.sort_do();
11129
11130 bool once = eval_once(set.type(), eval);
11131
11132 apply_predicate(set, 0, stack, once);
11133
11134 return set;
11135 }
11136
11137 case ast_func_id:
11138 return xpath_node_set_raw();
11139
11140 case ast_step:
11141 {
11142 switch (_axis)
11143 {
11144 case axis_ancestor:
11145 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
11146
11147 case axis_ancestor_or_self:
11148 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
11149
11150 case axis_attribute:
11151 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
11152
11153 case axis_child:
11154 return step_do(c, stack, eval, axis_to_type<axis_child>());
11155
11156 case axis_descendant:
11157 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
11158
11159 case axis_descendant_or_self:
11160 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
11161
11162 case axis_following:
11163 return step_do(c, stack, eval, axis_to_type<axis_following>());
11164
11165 case axis_following_sibling:
11166 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
11167
11168 case axis_namespace:
11169 // namespaced axis is not supported
11170 return xpath_node_set_raw();
11171
11172 case axis_parent:
11173 return step_do(c, stack, eval, axis_to_type<axis_parent>());
11174
11175 case axis_preceding:
11176 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
11177
11178 case axis_preceding_sibling:
11179 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
11180
11181 case axis_self:
11182 return step_do(c, stack, eval, axis_to_type<axis_self>());
11183
11184 default:
11185 assert(false && "Unknown axis"); // unreachable
11186 return xpath_node_set_raw();
11187 }
11188 }
11189
11190 case ast_step_root:
11191 {
11192 assert(!_right); // root step can't have any predicates
11193
11194 xpath_node_set_raw ns;
11195
11196 ns.set_type(xpath_node_set::type_sorted);
11197
11198 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
11199 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
11200
11201 return ns;
11202 }
11203
11204 case ast_variable:
11205 {
11206 assert(_rettype == _data.variable->type());
11207
11208 if (_rettype == xpath_type_node_set)
11209 {
11210 const xpath_node_set& s = _data.variable->get_node_set();
11211
11212 xpath_node_set_raw ns;
11213
11214 ns.set_type(s.type());
11215 ns.append(s.begin(), s.end(), stack.result);
11216
11217 return ns;
11218 }
11219
11220 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11221 break;
11222 }
11223
11224 default:
11225 ;
11226 }
11227
11228 // none of the ast types that return the value directly matched, but conversions to node set are invalid
11229 assert(false && "Wrong expression for return type node set"); // unreachable
11230 return xpath_node_set_raw();
11231 }
11232
11233 void optimize(xpath_allocator* alloc)
11234 {
11235 if (_left)
11236 _left->optimize(alloc);
11237
11238 if (_right)
11239 _right->optimize(alloc);
11240
11241 if (_next)
11242 _next->optimize(alloc);
11243
11244 // coverity[var_deref_model]
11245 optimize_self(alloc);
11246 }
11247
11248 void optimize_self(xpath_allocator* alloc)
11249 {
11250 // Rewrite [position()=expr] with [expr]
11251 // Note that this step has to go before classification to recognize [position()=1]
11252 if ((_type == ast_filter || _type == ast_predicate) &&
11253 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11254 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11255 {
11256 _right = _right->_right;
11257 }
11258
11259 // Classify filter/predicate ops to perform various optimizations during evaluation
11260 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11261 {
11262 assert(_test == predicate_default);
11263
11264 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11265 _test = predicate_constant_one;
11266 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11267 _test = predicate_constant;
11268 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11269 _test = predicate_posinv;
11270 }
11271
11272 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
11273 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11274 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11275 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11276 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11277 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11278 is_posinv_step())
11279 {
11280 if (_axis == axis_child || _axis == axis_descendant)
11281 _axis = axis_descendant;
11282 else
11283 _axis = axis_descendant_or_self;
11284
11285 _left = _left->_left;
11286 }
11287
11288 // Use optimized lookup table implementation for translate() with constant arguments
11289 if (_type == ast_func_translate &&
11290 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11291 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11292 {
11293 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11294
11295 if (table)
11296 {
11297 _type = ast_opt_translate_table;
11298 _data.table = table;
11299 }
11300 }
11301
11302 // Use optimized path for @attr = 'value' or @attr = $value
11303 if (_type == ast_op_equal &&
11304 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11305 // coverity[mixed_enums]
11306 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11307 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11308 {
11309 _type = ast_opt_compare_attribute;
11310 }
11311 }
11312
11313 bool is_posinv_expr() const
11314 {
11315 switch (_type)
11316 {
11317 case ast_func_position:
11318 case ast_func_last:
11319 return false;
11320
11321 case ast_string_constant:
11322 case ast_number_constant:
11323 case ast_variable:
11324 return true;
11325
11326 case ast_step:
11327 case ast_step_root:
11328 return true;
11329
11330 case ast_predicate:
11331 case ast_filter:
11332 return true;
11333
11334 default:
11335 if (_left && !_left->is_posinv_expr()) return false;
11336
11337 for (xpath_ast_node* n = _right; n; n = n->_next)
11338 if (!n->is_posinv_expr()) return false;
11339
11340 return true;
11341 }
11342 }
11343
11344 bool is_posinv_step() const
11345 {
11346 assert(_type == ast_step);
11347
11348 for (xpath_ast_node* n = _right; n; n = n->_next)
11349 {
11350 assert(n->_type == ast_predicate);
11351
11352 if (n->_test != predicate_posinv)
11353 return false;
11354 }
11355
11356 return true;
11357 }
11358
11359 xpath_value_type rettype() const
11360 {
11361 return static_cast<xpath_value_type>(_rettype);
11362 }
11363 };
11364
11365 static const size_t xpath_ast_depth_limit =
11366 #ifdef PUGIXML_XPATH_DEPTH_LIMIT
11367 PUGIXML_XPATH_DEPTH_LIMIT
11368 #else
11369 1024
11370 #endif
11371 ;
11372
11373 struct xpath_parser
11374 {
11375 xpath_allocator* _alloc;
11376 xpath_lexer _lexer;
11377
11378 const char_t* _query;
11379 xpath_variable_set* _variables;
11380
11381 xpath_parse_result* _result;
11382
11383 char_t _scratch[32];
11384
11385 size_t _depth;
11386
11387 xpath_ast_node* error(const char* message)
11388 {
11389 _result->error = message;
11390 _result->offset = _lexer.current_pos() - _query;
11391
11392 return 0;
11393 }
11394
11395 xpath_ast_node* error_oom()
11396 {
11397 assert(_alloc->_error);
11398 *_alloc->_error = true;
11399
11400 return 0;
11401 }
11402
11403 xpath_ast_node* error_rec()
11404 {
11405 return error("Exceeded maximum allowed query depth");
11406 }
11407
11408 void* alloc_node()
11409 {
11410 return _alloc->allocate(sizeof(xpath_ast_node));
11411 }
11412
11413 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11414 {
11415 void* memory = alloc_node();
11416 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11417 }
11418
11419 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11420 {
11421 void* memory = alloc_node();
11422 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11423 }
11424
11425 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11426 {
11427 void* memory = alloc_node();
11428 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11429 }
11430
11431 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11432 {
11433 void* memory = alloc_node();
11434 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11435 }
11436
11437 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11438 {
11439 void* memory = alloc_node();
11440 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11441 }
11442
11443 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11444 {
11445 void* memory = alloc_node();
11446 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11447 }
11448
11449 const char_t* alloc_string(const xpath_lexer_string& value)
11450 {
11451 if (!value.begin)
11452 return PUGIXML_TEXT("");
11453
11454 size_t length = static_cast<size_t>(value.end - value.begin);
11455
11456 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11457 if (!c) return 0;
11458
11459 memcpy(c, value.begin, length * sizeof(char_t));
11460 c[length] = 0;
11461
11462 return c;
11463 }
11464
11465 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11466 {
11467 switch (name.begin[0])
11468 {
11469 case 'b':
11470 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11471 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11472
11473 break;
11474
11475 case 'c':
11476 if (name == PUGIXML_TEXT("count") && argc == 1)
11477 {
11478 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11479 return alloc_node(ast_func_count, xpath_type_number, args[0]);
11480 }
11481 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11482 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11483 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11484 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11485 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11486 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11487
11488 break;
11489
11490 case 'f':
11491 if (name == PUGIXML_TEXT("false") && argc == 0)
11492 return alloc_node(ast_func_false, xpath_type_boolean);
11493 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11494 return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11495
11496 break;
11497
11498 case 'i':
11499 if (name == PUGIXML_TEXT("id") && argc == 1)
11500 return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11501
11502 break;
11503
11504 case 'l':
11505 if (name == PUGIXML_TEXT("last") && argc == 0)
11506 return alloc_node(ast_func_last, xpath_type_number);
11507 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11508 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11509 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11510 {
11511 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11512 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11513 }
11514
11515 break;
11516
11517 case 'n':
11518 if (name == PUGIXML_TEXT("name") && argc <= 1)
11519 {
11520 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11521 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11522 }
11523 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11524 {
11525 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11526 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11527 }
11528 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11529 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11530 else if (name == PUGIXML_TEXT("not") && argc == 1)
11531 return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11532 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11533 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11534
11535 break;
11536
11537 case 'p':
11538 if (name == PUGIXML_TEXT("position") && argc == 0)
11539 return alloc_node(ast_func_position, xpath_type_number);
11540
11541 break;
11542
11543 case 'r':
11544 if (name == PUGIXML_TEXT("round") && argc == 1)
11545 return alloc_node(ast_func_round, xpath_type_number, args[0]);
11546
11547 break;
11548
11549 case 's':
11550 if (name == PUGIXML_TEXT("string") && argc <= 1)
11551 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11552 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11553 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11554 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11555 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11556 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11557 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11558 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11559 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11560 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11561 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11562 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11563 {
11564 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11565 return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11566 }
11567
11568 break;
11569
11570 case 't':
11571 if (name == PUGIXML_TEXT("translate") && argc == 3)
11572 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11573 else if (name == PUGIXML_TEXT("true") && argc == 0)
11574 return alloc_node(ast_func_true, xpath_type_boolean);
11575
11576 break;
11577
11578 default:
11579 break;
11580 }
11581
11582 return error("Unrecognized function or wrong parameter count");
11583 }
11584
11585 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11586 {
11587 specified = true;
11588
11589 switch (name.begin[0])
11590 {
11591 case 'a':
11592 if (name == PUGIXML_TEXT("ancestor"))
11593 return axis_ancestor;
11594 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11595 return axis_ancestor_or_self;
11596 else if (name == PUGIXML_TEXT("attribute"))
11597 return axis_attribute;
11598
11599 break;
11600
11601 case 'c':
11602 if (name == PUGIXML_TEXT("child"))
11603 return axis_child;
11604
11605 break;
11606
11607 case 'd':
11608 if (name == PUGIXML_TEXT("descendant"))
11609 return axis_descendant;
11610 else if (name == PUGIXML_TEXT("descendant-or-self"))
11611 return axis_descendant_or_self;
11612
11613 break;
11614
11615 case 'f':
11616 if (name == PUGIXML_TEXT("following"))
11617 return axis_following;
11618 else if (name == PUGIXML_TEXT("following-sibling"))
11619 return axis_following_sibling;
11620
11621 break;
11622
11623 case 'n':
11624 if (name == PUGIXML_TEXT("namespace"))
11625 return axis_namespace;
11626
11627 break;
11628
11629 case 'p':
11630 if (name == PUGIXML_TEXT("parent"))
11631 return axis_parent;
11632 else if (name == PUGIXML_TEXT("preceding"))
11633 return axis_preceding;
11634 else if (name == PUGIXML_TEXT("preceding-sibling"))
11635 return axis_preceding_sibling;
11636
11637 break;
11638
11639 case 's':
11640 if (name == PUGIXML_TEXT("self"))
11641 return axis_self;
11642
11643 break;
11644
11645 default:
11646 break;
11647 }
11648
11649 specified = false;
11650 return axis_child;
11651 }
11652
11653 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11654 {
11655 switch (name.begin[0])
11656 {
11657 case 'c':
11658 if (name == PUGIXML_TEXT("comment"))
11659 return nodetest_type_comment;
11660
11661 break;
11662
11663 case 'n':
11664 if (name == PUGIXML_TEXT("node"))
11665 return nodetest_type_node;
11666
11667 break;
11668
11669 case 'p':
11670 if (name == PUGIXML_TEXT("processing-instruction"))
11671 return nodetest_type_pi;
11672
11673 break;
11674
11675 case 't':
11676 if (name == PUGIXML_TEXT("text"))
11677 return nodetest_type_text;
11678
11679 break;
11680
11681 default:
11682 break;
11683 }
11684
11685 return nodetest_none;
11686 }
11687
11688 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11689 xpath_ast_node* parse_primary_expression()
11690 {
11691 switch (_lexer.current())
11692 {
11693 case lex_var_ref:
11694 {
11695 xpath_lexer_string name = _lexer.contents();
11696
11697 if (!_variables)
11698 return error("Unknown variable: variable set is not provided");
11699
11700 xpath_variable* var = 0;
11701 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11702 return error_oom();
11703
11704 if (!var)
11705 return error("Unknown variable: variable set does not contain the given name");
11706
11707 _lexer.next();
11708
11709 return alloc_node(ast_variable, var->type(), var);
11710 }
11711
11712 case lex_open_brace:
11713 {
11714 _lexer.next();
11715
11716 xpath_ast_node* n = parse_expression();
11717 if (!n) return 0;
11718
11719 if (_lexer.current() != lex_close_brace)
11720 return error("Expected ')' to match an opening '('");
11721
11722 _lexer.next();
11723
11724 return n;
11725 }
11726
11727 case lex_quoted_string:
11728 {
11729 const char_t* value = alloc_string(_lexer.contents());
11730 if (!value) return 0;
11731
11732 _lexer.next();
11733
11734 return alloc_node(ast_string_constant, xpath_type_string, value);
11735 }
11736
11737 case lex_number:
11738 {
11739 double value = 0;
11740
11741 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11742 return error_oom();
11743
11744 _lexer.next();
11745
11746 return alloc_node(ast_number_constant, xpath_type_number, value);
11747 }
11748
11749 case lex_string:
11750 {
11751 xpath_ast_node* args[2] = {0};
11752 size_t argc = 0;
11753
11754 xpath_lexer_string function = _lexer.contents();
11755 _lexer.next();
11756
11757 xpath_ast_node* last_arg = 0;
11758
11759 if (_lexer.current() != lex_open_brace)
11760 return error("Unrecognized function call");
11761 _lexer.next();
11762
11763 size_t old_depth = _depth;
11764
11765 while (_lexer.current() != lex_close_brace)
11766 {
11767 if (argc > 0)
11768 {
11769 if (_lexer.current() != lex_comma)
11770 return error("No comma between function arguments");
11771 _lexer.next();
11772 }
11773
11774 if (++_depth > xpath_ast_depth_limit)
11775 return error_rec();
11776
11777 xpath_ast_node* n = parse_expression();
11778 if (!n) return 0;
11779
11780 if (argc < 2) args[argc] = n;
11781 else last_arg->set_next(n);
11782
11783 argc++;
11784 last_arg = n;
11785 }
11786
11787 _lexer.next();
11788
11789 _depth = old_depth;
11790
11791 return parse_function(function, argc, args);
11792 }
11793
11794 default:
11795 return error("Unrecognizable primary expression");
11796 }
11797 }
11798
11799 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11800 // Predicate ::= '[' PredicateExpr ']'
11801 // PredicateExpr ::= Expr
11802 xpath_ast_node* parse_filter_expression()
11803 {
11804 xpath_ast_node* n = parse_primary_expression();
11805 if (!n) return 0;
11806
11807 size_t old_depth = _depth;
11808
11809 while (_lexer.current() == lex_open_square_brace)
11810 {
11811 _lexer.next();
11812
11813 if (++_depth > xpath_ast_depth_limit)
11814 return error_rec();
11815
11816 if (n->rettype() != xpath_type_node_set)
11817 return error("Predicate has to be applied to node set");
11818
11819 xpath_ast_node* expr = parse_expression();
11820 if (!expr) return 0;
11821
11822 n = alloc_node(ast_filter, n, expr, predicate_default);
11823 if (!n) return 0;
11824
11825 if (_lexer.current() != lex_close_square_brace)
11826 return error("Expected ']' to match an opening '['");
11827
11828 _lexer.next();
11829 }
11830
11831 _depth = old_depth;
11832
11833 return n;
11834 }
11835
11836 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11837 // AxisSpecifier ::= AxisName '::' | '@'?
11838 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11839 // NameTest ::= '*' | NCName ':' '*' | QName
11840 // AbbreviatedStep ::= '.' | '..'
11841 xpath_ast_node* parse_step(xpath_ast_node* set)
11842 {
11843 if (set && set->rettype() != xpath_type_node_set)
11844 return error("Step has to be applied to node set");
11845
11846 bool axis_specified = false;
11847 axis_t axis = axis_child; // implied child axis
11848
11849 if (_lexer.current() == lex_axis_attribute)
11850 {
11851 axis = axis_attribute;
11852 axis_specified = true;
11853
11854 _lexer.next();
11855 }
11856 else if (_lexer.current() == lex_dot)
11857 {
11858 _lexer.next();
11859
11860 if (_lexer.current() == lex_open_square_brace)
11861 return error("Predicates are not allowed after an abbreviated step");
11862
11863 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11864 }
11865 else if (_lexer.current() == lex_double_dot)
11866 {
11867 _lexer.next();
11868
11869 if (_lexer.current() == lex_open_square_brace)
11870 return error("Predicates are not allowed after an abbreviated step");
11871
11872 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11873 }
11874
11875 nodetest_t nt_type = nodetest_none;
11876 xpath_lexer_string nt_name;
11877
11878 if (_lexer.current() == lex_string)
11879 {
11880 // node name test
11881 nt_name = _lexer.contents();
11882 _lexer.next();
11883
11884 // was it an axis name?
11885 if (_lexer.current() == lex_double_colon)
11886 {
11887 // parse axis name
11888 if (axis_specified)
11889 return error("Two axis specifiers in one step");
11890
11891 axis = parse_axis_name(nt_name, axis_specified);
11892
11893 if (!axis_specified)
11894 return error("Unknown axis");
11895
11896 // read actual node test
11897 _lexer.next();
11898
11899 if (_lexer.current() == lex_multiply)
11900 {
11901 nt_type = nodetest_all;
11902 nt_name = xpath_lexer_string();
11903 _lexer.next();
11904 }
11905 else if (_lexer.current() == lex_string)
11906 {
11907 nt_name = _lexer.contents();
11908 _lexer.next();
11909 }
11910 else
11911 {
11912 return error("Unrecognized node test");
11913 }
11914 }
11915
11916 if (nt_type == nodetest_none)
11917 {
11918 // node type test or processing-instruction
11919 if (_lexer.current() == lex_open_brace)
11920 {
11921 _lexer.next();
11922
11923 if (_lexer.current() == lex_close_brace)
11924 {
11925 _lexer.next();
11926
11927 nt_type = parse_node_test_type(nt_name);
11928
11929 if (nt_type == nodetest_none)
11930 return error("Unrecognized node type");
11931
11932 nt_name = xpath_lexer_string();
11933 }
11934 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11935 {
11936 if (_lexer.current() != lex_quoted_string)
11937 return error("Only literals are allowed as arguments to processing-instruction()");
11938
11939 nt_type = nodetest_pi;
11940 nt_name = _lexer.contents();
11941 _lexer.next();
11942
11943 if (_lexer.current() != lex_close_brace)
11944 return error("Unmatched brace near processing-instruction()");
11945 _lexer.next();
11946 }
11947 else
11948 {
11949 return error("Unmatched brace near node type test");
11950 }
11951 }
11952 // QName or NCName:*
11953 else
11954 {
11955 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11956 {
11957 nt_name.end--; // erase *
11958
11959 nt_type = nodetest_all_in_namespace;
11960 }
11961 else
11962 {
11963 nt_type = nodetest_name;
11964 }
11965 }
11966 }
11967 }
11968 else if (_lexer.current() == lex_multiply)
11969 {
11970 nt_type = nodetest_all;
11971 _lexer.next();
11972 }
11973 else
11974 {
11975 return error("Unrecognized node test");
11976 }
11977
11978 const char_t* nt_name_copy = alloc_string(nt_name);
11979 if (!nt_name_copy) return 0;
11980
11981 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11982 if (!n) return 0;
11983
11984 size_t old_depth = _depth;
11985
11986 xpath_ast_node* last = 0;
11987
11988 while (_lexer.current() == lex_open_square_brace)
11989 {
11990 _lexer.next();
11991
11992 if (++_depth > xpath_ast_depth_limit)
11993 return error_rec();
11994
11995 xpath_ast_node* expr = parse_expression();
11996 if (!expr) return 0;
11997
11998 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11999 if (!pred) return 0;
12000
12001 if (_lexer.current() != lex_close_square_brace)
12002 return error("Expected ']' to match an opening '['");
12003 _lexer.next();
12004
12005 if (last) last->set_next(pred);
12006 else n->set_right(pred);
12007
12008 last = pred;
12009 }
12010
12011 _depth = old_depth;
12012
12013 return n;
12014 }
12015
12016 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
12017 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
12018 {
12019 xpath_ast_node* n = parse_step(set);
12020 if (!n) return 0;
12021
12022 size_t old_depth = _depth;
12023
12024 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
12025 {
12026 lexeme_t l = _lexer.current();
12027 _lexer.next();
12028
12029 if (l == lex_double_slash)
12030 {
12031 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
12032 if (!n) return 0;
12033
12034 ++_depth;
12035 }
12036
12037 if (++_depth > xpath_ast_depth_limit)
12038 return error_rec();
12039
12040 n = parse_step(n);
12041 if (!n) return 0;
12042 }
12043
12044 _depth = old_depth;
12045
12046 return n;
12047 }
12048
12049 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
12050 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
12051 xpath_ast_node* parse_location_path()
12052 {
12053 if (_lexer.current() == lex_slash)
12054 {
12055 _lexer.next();
12056
12057 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
12058 if (!n) return 0;
12059
12060 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
12061 lexeme_t l = _lexer.current();
12062
12063 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
12064 return parse_relative_location_path(n);
12065 else
12066 return n;
12067 }
12068 else if (_lexer.current() == lex_double_slash)
12069 {
12070 _lexer.next();
12071
12072 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
12073 if (!n) return 0;
12074
12075 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
12076 if (!n) return 0;
12077
12078 return parse_relative_location_path(n);
12079 }
12080
12081 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
12082 return parse_relative_location_path(0);
12083 }
12084
12085 // PathExpr ::= LocationPath
12086 // | FilterExpr
12087 // | FilterExpr '/' RelativeLocationPath
12088 // | FilterExpr '//' RelativeLocationPath
12089 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
12090 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
12091 xpath_ast_node* parse_path_or_unary_expression()
12092 {
12093 // Clarification.
12094 // PathExpr begins with either LocationPath or FilterExpr.
12095 // FilterExpr begins with PrimaryExpr
12096 // PrimaryExpr begins with '$' in case of it being a variable reference,
12097 // '(' in case of it being an expression, string literal, number constant or
12098 // function call.
12099 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
12100 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
12101 _lexer.current() == lex_string)
12102 {
12103 if (_lexer.current() == lex_string)
12104 {
12105 // This is either a function call, or not - if not, we shall proceed with location path
12106 const char_t* state = _lexer.state();
12107
12108 while (PUGI_IMPL_IS_CHARTYPE(*state, ct_space)) ++state;
12109
12110 if (*state != '(')
12111 return parse_location_path();
12112
12113 // This looks like a function call; however this still can be a node-test. Check it.
12114 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
12115 return parse_location_path();
12116 }
12117
12118 xpath_ast_node* n = parse_filter_expression();
12119 if (!n) return 0;
12120
12121 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
12122 {
12123 lexeme_t l = _lexer.current();
12124 _lexer.next();
12125
12126 if (l == lex_double_slash)
12127 {
12128 if (n->rettype() != xpath_type_node_set)
12129 return error("Step has to be applied to node set");
12130
12131 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
12132 if (!n) return 0;
12133 }
12134
12135 // select from location path
12136 return parse_relative_location_path(n);
12137 }
12138
12139 return n;
12140 }
12141 else if (_lexer.current() == lex_minus)
12142 {
12143 _lexer.next();
12144
12145 // precedence 7+ - only parses union expressions
12146 xpath_ast_node* n = parse_expression(7);
12147 if (!n) return 0;
12148
12149 return alloc_node(ast_op_negate, xpath_type_number, n);
12150 }
12151 else
12152 {
12153 return parse_location_path();
12154 }
12155 }
12156
12157 struct binary_op_t
12158 {
12159 ast_type_t asttype;
12160 xpath_value_type rettype;
12161 int precedence;
12162
12163 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
12164 {
12165 }
12166
12167 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
12168 {
12169 }
12170
12171 static binary_op_t parse(xpath_lexer& lexer)
12172 {
12173 switch (lexer.current())
12174 {
12175 case lex_string:
12176 if (lexer.contents() == PUGIXML_TEXT("or"))
12177 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
12178 else if (lexer.contents() == PUGIXML_TEXT("and"))
12179 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
12180 else if (lexer.contents() == PUGIXML_TEXT("div"))
12181 return binary_op_t(ast_op_divide, xpath_type_number, 6);
12182 else if (lexer.contents() == PUGIXML_TEXT("mod"))
12183 return binary_op_t(ast_op_mod, xpath_type_number, 6);
12184 else
12185 return binary_op_t();
12186
12187 case lex_equal:
12188 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
12189
12190 case lex_not_equal:
12191 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
12192
12193 case lex_less:
12194 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
12195
12196 case lex_greater:
12197 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
12198
12199 case lex_less_or_equal:
12200 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
12201
12202 case lex_greater_or_equal:
12203 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
12204
12205 case lex_plus:
12206 return binary_op_t(ast_op_add, xpath_type_number, 5);
12207
12208 case lex_minus:
12209 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12210
12211 case lex_multiply:
12212 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12213
12214 case lex_union:
12215 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12216
12217 default:
12218 return binary_op_t();
12219 }
12220 }
12221 };
12222
12223 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12224 {
12225 binary_op_t op = binary_op_t::parse(_lexer);
12226
12227 while (op.asttype != ast_unknown && op.precedence >= limit)
12228 {
12229 _lexer.next();
12230
12231 if (++_depth > xpath_ast_depth_limit)
12232 return error_rec();
12233
12234 xpath_ast_node* rhs = parse_path_or_unary_expression();
12235 if (!rhs) return 0;
12236
12237 binary_op_t nextop = binary_op_t::parse(_lexer);
12238
12239 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12240 {
12241 rhs = parse_expression_rec(rhs, nextop.precedence);
12242 if (!rhs) return 0;
12243
12244 nextop = binary_op_t::parse(_lexer);
12245 }
12246
12247 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12248 return error("Union operator has to be applied to node sets");
12249
12250 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12251 if (!lhs) return 0;
12252
12253 op = binary_op_t::parse(_lexer);
12254 }
12255
12256 return lhs;
12257 }
12258
12259 // Expr ::= OrExpr
12260 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12261 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12262 // EqualityExpr ::= RelationalExpr
12263 // | EqualityExpr '=' RelationalExpr
12264 // | EqualityExpr '!=' RelationalExpr
12265 // RelationalExpr ::= AdditiveExpr
12266 // | RelationalExpr '<' AdditiveExpr
12267 // | RelationalExpr '>' AdditiveExpr
12268 // | RelationalExpr '<=' AdditiveExpr
12269 // | RelationalExpr '>=' AdditiveExpr
12270 // AdditiveExpr ::= MultiplicativeExpr
12271 // | AdditiveExpr '+' MultiplicativeExpr
12272 // | AdditiveExpr '-' MultiplicativeExpr
12273 // MultiplicativeExpr ::= UnaryExpr
12274 // | MultiplicativeExpr '*' UnaryExpr
12275 // | MultiplicativeExpr 'div' UnaryExpr
12276 // | MultiplicativeExpr 'mod' UnaryExpr
12277 xpath_ast_node* parse_expression(int limit = 0)
12278 {
12279 size_t old_depth = _depth;
12280
12281 if (++_depth > xpath_ast_depth_limit)
12282 return error_rec();
12283
12284 xpath_ast_node* n = parse_path_or_unary_expression();
12285 if (!n) return 0;
12286
12287 n = parse_expression_rec(n, limit);
12288
12289 _depth = old_depth;
12290
12291 return n;
12292 }
12293
12294 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12295 {
12296 }
12297
12298 xpath_ast_node* parse()
12299 {
12300 xpath_ast_node* n = parse_expression();
12301 if (!n) return 0;
12302
12303 assert(_depth == 0);
12304
12305 // check if there are unparsed tokens left
12306 if (_lexer.current() != lex_eof)
12307 return error("Incorrect query");
12308
12309 return n;
12310 }
12311
12312 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12313 {
12314 xpath_parser parser(query, variables, alloc, result);
12315
12316 return parser.parse();
12317 }
12318 };
12319
12320 struct xpath_query_impl
12321 {
12322 static xpath_query_impl* create()
12323 {
12324 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12325 if (!memory) return 0;
12326
12327 return new (memory) xpath_query_impl();
12328 }
12329
12330 static void destroy(xpath_query_impl* impl)
12331 {
12332 // free all allocated pages
12333 impl->alloc.release();
12334
12335 // free allocator memory (with the first page)
12336 xml_memory::deallocate(impl);
12337 }
12338
12339 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12340 {
12341 block.next = 0;
12342 block.capacity = sizeof(block.data);
12343 }
12344
12345 xpath_ast_node* root;
12346 xpath_allocator alloc;
12347 xpath_memory_block block;
12348 bool oom;
12349 };
12350
12351 PUGI_IMPL_FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12352 {
12353 if (!impl) return 0;
12354
12355 if (impl->root->rettype() != xpath_type_node_set)
12356 {
12357 #ifdef PUGIXML_NO_EXCEPTIONS
12358 return 0;
12359 #else
12360 xpath_parse_result res;
12361 res.error = "Expression does not evaluate to node set";
12362
12363 throw xpath_exception(res);
12364 #endif
12365 }
12366
12367 return impl->root;
12368 }
12369 PUGI_IMPL_NS_END
12370
12371 namespace pugi
12372 {
12373 #ifndef PUGIXML_NO_EXCEPTIONS
12374 PUGI_IMPL_FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12375 {
12376 assert(_result.error);
12377 }
12378
12379 PUGI_IMPL_FN const char* xpath_exception::what() const throw()
12380 {
12381 return _result.error;
12382 }
12383
12384 PUGI_IMPL_FN const xpath_parse_result& xpath_exception::result() const
12385 {
12386 return _result;
12387 }
12388 #endif
12389
12390 PUGI_IMPL_FN xpath_node::xpath_node()
12391 {
12392 }
12393
12394 PUGI_IMPL_FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12395 {
12396 }
12397
12398 PUGI_IMPL_FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12399 {
12400 }
12401
12402 PUGI_IMPL_FN xml_node xpath_node::node() const
12403 {
12404 return _attribute ? xml_node() : _node;
12405 }
12406
12407 PUGI_IMPL_FN xml_attribute xpath_node::attribute() const
12408 {
12409 return _attribute;
12410 }
12411
12412 PUGI_IMPL_FN xml_node xpath_node::parent() const
12413 {
12414 return _attribute ? _node : _node.parent();
12415 }
12416
12417 PUGI_IMPL_FN static void unspecified_bool_xpath_node(xpath_node***)
12418 {
12419 }
12420
12421 PUGI_IMPL_FN xpath_node::operator xpath_node::unspecified_bool_type() const
12422 {
12423 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12424 }
12425
12426 PUGI_IMPL_FN bool xpath_node::operator!() const
12427 {
12428 return !(_node || _attribute);
12429 }
12430
12431 PUGI_IMPL_FN bool xpath_node::operator==(const xpath_node& n) const
12432 {
12433 return _node == n._node && _attribute == n._attribute;
12434 }
12435
12436 PUGI_IMPL_FN bool xpath_node::operator!=(const xpath_node& n) const
12437 {
12438 return _node != n._node || _attribute != n._attribute;
12439 }
12440
12441 #ifdef __BORLANDC__
12442 PUGI_IMPL_FN bool operator&&(const xpath_node& lhs, bool rhs)
12443 {
12444 return (bool)lhs && rhs;
12445 }
12446
12447 PUGI_IMPL_FN bool operator||(const xpath_node& lhs, bool rhs)
12448 {
12449 return (bool)lhs || rhs;
12450 }
12451 #endif
12452
12453 PUGI_IMPL_FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12454 {
12455 assert(begin_ <= end_);
12456
12457 size_t size_ = static_cast<size_t>(end_ - begin_);
12458
12459 // use internal buffer for 0 or 1 elements, heap buffer otherwise
12460 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12461
12462 if (!storage)
12463 {
12464 #ifdef PUGIXML_NO_EXCEPTIONS
12465 return;
12466 #else
12467 throw std::bad_alloc();
12468 #endif
12469 }
12470
12471 // deallocate old buffer
12472 if (_begin != _storage)
12473 impl::xml_memory::deallocate(_begin);
12474
12475 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12476 if (size_)
12477 memcpy(storage, begin_, size_ * sizeof(xpath_node));
12478
12479 _begin = storage;
12480 _end = storage + size_;
12481 _type = type_;
12482 }
12483
12484 #ifdef PUGIXML_HAS_MOVE
12485 PUGI_IMPL_FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12486 {
12487 _type = rhs._type;
12488 _storage[0] = rhs._storage[0];
12489 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12490 _end = _begin + (rhs._end - rhs._begin);
12491
12492 rhs._type = type_unsorted;
12493 rhs._begin = rhs._storage;
12494 rhs._end = rhs._storage;
12495 }
12496 #endif
12497
12498 PUGI_IMPL_FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12499 {
12500 }
12501
12502 PUGI_IMPL_FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12503 {
12504 _assign(begin_, end_, type_);
12505 }
12506
12507 PUGI_IMPL_FN xpath_node_set::~xpath_node_set()
12508 {
12509 if (_begin != _storage)
12510 impl::xml_memory::deallocate(_begin);
12511 }
12512
12513 PUGI_IMPL_FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12514 {
12515 _assign(ns._begin, ns._end, ns._type);
12516 }
12517
12518 PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12519 {
12520 if (this == &ns) return *this;
12521
12522 _assign(ns._begin, ns._end, ns._type);
12523
12524 return *this;
12525 }
12526
12527 #ifdef PUGIXML_HAS_MOVE
12528 PUGI_IMPL_FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12529 {
12530 _move(rhs);
12531 }
12532
12533 PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12534 {
12535 if (this == &rhs) return *this;
12536
12537 if (_begin != _storage)
12538 impl::xml_memory::deallocate(_begin);
12539
12540 _move(rhs);
12541
12542 return *this;
12543 }
12544 #endif
12545
12546 PUGI_IMPL_FN xpath_node_set::type_t xpath_node_set::type() const
12547 {
12548 return _type;
12549 }
12550
12551 PUGI_IMPL_FN size_t xpath_node_set::size() const
12552 {
12553 return _end - _begin;
12554 }
12555
12556 PUGI_IMPL_FN bool xpath_node_set::empty() const
12557 {
12558 return _begin == _end;
12559 }
12560
12561 PUGI_IMPL_FN const xpath_node& xpath_node_set::operator[](size_t index) const
12562 {
12563 assert(index < size());
12564 return _begin[index];
12565 }
12566
12567 PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::begin() const
12568 {
12569 return _begin;
12570 }
12571
12572 PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::end() const
12573 {
12574 return _end;
12575 }
12576
12577 PUGI_IMPL_FN void xpath_node_set::sort(bool reverse)
12578 {
12579 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12580 }
12581
12582 PUGI_IMPL_FN xpath_node xpath_node_set::first() const
12583 {
12584 return impl::xpath_first(_begin, _end, _type);
12585 }
12586
12587 PUGI_IMPL_FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12588 {
12589 }
12590
12591 PUGI_IMPL_FN xpath_parse_result::operator bool() const
12592 {
12593 return error == 0;
12594 }
12595
12596 PUGI_IMPL_FN const char* xpath_parse_result::description() const
12597 {
12598 return error ? error : "No error";
12599 }
12600
12601 PUGI_IMPL_FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12602 {
12603 }
12604
12605 PUGI_IMPL_FN const char_t* xpath_variable::name() const
12606 {
12607 switch (_type)
12608 {
12609 case xpath_type_node_set:
12610 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12611
12612 case xpath_type_number:
12613 return static_cast<const impl::xpath_variable_number*>(this)->name;
12614
12615 case xpath_type_string:
12616 return static_cast<const impl::xpath_variable_string*>(this)->name;
12617
12618 case xpath_type_boolean:
12619 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12620
12621 default:
12622 assert(false && "Invalid variable type"); // unreachable
12623 return 0;
12624 }
12625 }
12626
12627 PUGI_IMPL_FN xpath_value_type xpath_variable::type() const
12628 {
12629 return _type;
12630 }
12631
12632 PUGI_IMPL_FN bool xpath_variable::get_boolean() const
12633 {
12634 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12635 }
12636
12637 PUGI_IMPL_FN double xpath_variable::get_number() const
12638 {
12639 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12640 }
12641
12642 PUGI_IMPL_FN const char_t* xpath_variable::get_string() const
12643 {
12644 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12645 return value ? value : PUGIXML_TEXT("");
12646 }
12647
12648 PUGI_IMPL_FN const xpath_node_set& xpath_variable::get_node_set() const
12649 {
12650 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12651 }
12652
12653 PUGI_IMPL_FN bool xpath_variable::set(bool value)
12654 {
12655 if (_type != xpath_type_boolean) return false;
12656
12657 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12658 return true;
12659 }
12660
12661 PUGI_IMPL_FN bool xpath_variable::set(double value)
12662 {
12663 if (_type != xpath_type_number) return false;
12664
12665 static_cast<impl::xpath_variable_number*>(this)->value = value;
12666 return true;
12667 }
12668
12669 PUGI_IMPL_FN bool xpath_variable::set(const char_t* value)
12670 {
12671 if (_type != xpath_type_string) return false;
12672
12673 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12674
12675 // duplicate string
12676 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12677
12678 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12679 if (!copy) return false;
12680
12681 memcpy(copy, value, size);
12682
12683 // replace old string
12684 if (var->value) impl::xml_memory::deallocate(var->value);
12685 var->value = copy;
12686
12687 return true;
12688 }
12689
12690 PUGI_IMPL_FN bool xpath_variable::set(const xpath_node_set& value)
12691 {
12692 if (_type != xpath_type_node_set) return false;
12693
12694 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12695 return true;
12696 }
12697
12698 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set()
12699 {
12700 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12701 _data[i] = 0;
12702 }
12703
12704 PUGI_IMPL_FN xpath_variable_set::~xpath_variable_set()
12705 {
12706 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12707 _destroy(_data[i]);
12708 }
12709
12710 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12711 {
12712 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12713 _data[i] = 0;
12714
12715 _assign(rhs);
12716 }
12717
12718 PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12719 {
12720 if (this == &rhs) return *this;
12721
12722 _assign(rhs);
12723
12724 return *this;
12725 }
12726
12727 #ifdef PUGIXML_HAS_MOVE
12728 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12729 {
12730 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12731 {
12732 _data[i] = rhs._data[i];
12733 rhs._data[i] = 0;
12734 }
12735 }
12736
12737 PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12738 {
12739 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12740 {
12741 _destroy(_data[i]);
12742
12743 _data[i] = rhs._data[i];
12744 rhs._data[i] = 0;
12745 }
12746
12747 return *this;
12748 }
12749 #endif
12750
12751 PUGI_IMPL_FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12752 {
12753 xpath_variable_set temp;
12754
12755 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12756 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12757 return;
12758
12759 _swap(temp);
12760 }
12761
12762 PUGI_IMPL_FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12763 {
12764 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12765 {
12766 xpath_variable* chain = _data[i];
12767
12768 _data[i] = rhs._data[i];
12769 rhs._data[i] = chain;
12770 }
12771 }
12772
12773 PUGI_IMPL_FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12774 {
12775 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12776 size_t hash = impl::hash_string(name) % hash_size;
12777
12778 // look for existing variable
12779 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12780 if (impl::strequal(var->name(), name))
12781 return var;
12782
12783 return 0;
12784 }
12785
12786 PUGI_IMPL_FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12787 {
12788 xpath_variable* last = 0;
12789
12790 while (var)
12791 {
12792 // allocate storage for new variable
12793 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12794 if (!nvar) return false;
12795
12796 // link the variable to the result immediately to handle failures gracefully
12797 if (last)
12798 last->_next = nvar;
12799 else
12800 *out_result = nvar;
12801
12802 last = nvar;
12803
12804 // copy the value; this can fail due to out-of-memory conditions
12805 if (!impl::copy_xpath_variable(nvar, var)) return false;
12806
12807 var = var->_next;
12808 }
12809
12810 return true;
12811 }
12812
12813 PUGI_IMPL_FN void xpath_variable_set::_destroy(xpath_variable* var)
12814 {
12815 while (var)
12816 {
12817 xpath_variable* next = var->_next;
12818
12819 impl::delete_xpath_variable(var->_type, var);
12820
12821 var = next;
12822 }
12823 }
12824
12825 PUGI_IMPL_FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12826 {
12827 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12828 size_t hash = impl::hash_string(name) % hash_size;
12829
12830 // look for existing variable
12831 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12832 if (impl::strequal(var->name(), name))
12833 return var->type() == type ? var : 0;
12834
12835 // add new variable
12836 xpath_variable* result = impl::new_xpath_variable(type, name);
12837
12838 if (result)
12839 {
12840 result->_next = _data[hash];
12841
12842 _data[hash] = result;
12843 }
12844
12845 return result;
12846 }
12847
12848 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, bool value)
12849 {
12850 xpath_variable* var = add(name, xpath_type_boolean);
12851 return var ? var->set(value) : false;
12852 }
12853
12854 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, double value)
12855 {
12856 xpath_variable* var = add(name, xpath_type_number);
12857 return var ? var->set(value) : false;
12858 }
12859
12860 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12861 {
12862 xpath_variable* var = add(name, xpath_type_string);
12863 return var ? var->set(value) : false;
12864 }
12865
12866 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12867 {
12868 xpath_variable* var = add(name, xpath_type_node_set);
12869 return var ? var->set(value) : false;
12870 }
12871
12872 PUGI_IMPL_FN xpath_variable* xpath_variable_set::get(const char_t* name)
12873 {
12874 return _find(name);
12875 }
12876
12877 PUGI_IMPL_FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12878 {
12879 return _find(name);
12880 }
12881
12882 PUGI_IMPL_FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12883 {
12884 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12885
12886 if (!qimpl)
12887 {
12888 #ifdef PUGIXML_NO_EXCEPTIONS
12889 _result.error = "Out of memory";
12890 #else
12891 throw std::bad_alloc();
12892 #endif
12893 }
12894 else
12895 {
12896 using impl::auto_deleter; // MSVC7 workaround
12897 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12898
12899 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12900
12901 if (qimpl->root)
12902 {
12903 qimpl->root->optimize(&qimpl->alloc);
12904
12905 _impl = impl.release();
12906 _result.error = 0;
12907 }
12908 else
12909 {
12910 #ifdef PUGIXML_NO_EXCEPTIONS
12911 if (qimpl->oom) _result.error = "Out of memory";
12912 #else
12913 if (qimpl->oom) throw std::bad_alloc();
12914 throw xpath_exception(_result);
12915 #endif
12916 }
12917 }
12918 }
12919
12920 PUGI_IMPL_FN xpath_query::xpath_query(): _impl(0)
12921 {
12922 }
12923
12924 PUGI_IMPL_FN xpath_query::~xpath_query()
12925 {
12926 if (_impl)
12927 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12928 }
12929
12930 #ifdef PUGIXML_HAS_MOVE
12931 PUGI_IMPL_FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12932 {
12933 _impl = rhs._impl;
12934 _result = rhs._result;
12935 rhs._impl = 0;
12936 rhs._result = xpath_parse_result();
12937 }
12938
12939 PUGI_IMPL_FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12940 {
12941 if (this == &rhs) return *this;
12942
12943 if (_impl)
12944 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12945
12946 _impl = rhs._impl;
12947 _result = rhs._result;
12948 rhs._impl = 0;
12949 rhs._result = xpath_parse_result();
12950
12951 return *this;
12952 }
12953 #endif
12954
12955 PUGI_IMPL_FN xpath_value_type xpath_query::return_type() const
12956 {
12957 if (!_impl) return xpath_type_none;
12958
12959 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12960 }
12961
12962 PUGI_IMPL_FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12963 {
12964 if (!_impl) return false;
12965
12966 impl::xpath_context c(n, 1, 1);
12967 impl::xpath_stack_data sd;
12968
12969 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12970
12971 if (sd.oom)
12972 {
12973 #ifdef PUGIXML_NO_EXCEPTIONS
12974 return false;
12975 #else
12976 throw std::bad_alloc();
12977 #endif
12978 }
12979
12980 return r;
12981 }
12982
12983 PUGI_IMPL_FN double xpath_query::evaluate_number(const xpath_node& n) const
12984 {
12985 if (!_impl) return impl::gen_nan();
12986
12987 impl::xpath_context c(n, 1, 1);
12988 impl::xpath_stack_data sd;
12989
12990 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12991
12992 if (sd.oom)
12993 {
12994 #ifdef PUGIXML_NO_EXCEPTIONS
12995 return impl::gen_nan();
12996 #else
12997 throw std::bad_alloc();
12998 #endif
12999 }
13000
13001 return r;
13002 }
13003
13004 #ifndef PUGIXML_NO_STL
13005 PUGI_IMPL_FN string_t xpath_query::evaluate_string(const xpath_node& n) const
13006 {
13007 if (!_impl) return string_t();
13008
13009 impl::xpath_context c(n, 1, 1);
13010 impl::xpath_stack_data sd;
13011
13012 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
13013
13014 if (sd.oom)
13015 {
13016 #ifdef PUGIXML_NO_EXCEPTIONS
13017 return string_t();
13018 #else
13019 throw std::bad_alloc();
13020 #endif
13021 }
13022
13023 return string_t(r.c_str(), r.length());
13024 }
13025 #endif
13026
13027 PUGI_IMPL_FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
13028 {
13029 impl::xpath_context c(n, 1, 1);
13030 impl::xpath_stack_data sd;
13031
13032 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
13033
13034 if (sd.oom)
13035 {
13036 #ifdef PUGIXML_NO_EXCEPTIONS
13037 r = impl::xpath_string();
13038 #else
13039 throw std::bad_alloc();
13040 #endif
13041 }
13042
13043 size_t full_size = r.length() + 1;
13044
13045 if (capacity > 0)
13046 {
13047 size_t size = (full_size < capacity) ? full_size : capacity;
13048 assert(size > 0);
13049
13050 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
13051 buffer[size - 1] = 0;
13052 }
13053
13054 return full_size;
13055 }
13056
13057 PUGI_IMPL_FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
13058 {
13059 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
13060 if (!root) return xpath_node_set();
13061
13062 impl::xpath_context c(n, 1, 1);
13063 impl::xpath_stack_data sd;
13064
13065 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
13066
13067 if (sd.oom)
13068 {
13069 #ifdef PUGIXML_NO_EXCEPTIONS
13070 return xpath_node_set();
13071 #else
13072 throw std::bad_alloc();
13073 #endif
13074 }
13075
13076 return xpath_node_set(r.begin(), r.end(), r.type());
13077 }
13078
13079 PUGI_IMPL_FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
13080 {
13081 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
13082 if (!root) return xpath_node();
13083
13084 impl::xpath_context c(n, 1, 1);
13085 impl::xpath_stack_data sd;
13086
13087 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
13088
13089 if (sd.oom)
13090 {
13091 #ifdef PUGIXML_NO_EXCEPTIONS
13092 return xpath_node();
13093 #else
13094 throw std::bad_alloc();
13095 #endif
13096 }
13097
13098 return r.first();
13099 }
13100
13101 PUGI_IMPL_FN const xpath_parse_result& xpath_query::result() const
13102 {
13103 return _result;
13104 }
13105
13106 PUGI_IMPL_FN static void unspecified_bool_xpath_query(xpath_query***)
13107 {
13108 }
13109
13110 PUGI_IMPL_FN xpath_query::operator xpath_query::unspecified_bool_type() const
13111 {
13112 return _impl ? unspecified_bool_xpath_query : 0;
13113 }
13114
13115 PUGI_IMPL_FN bool xpath_query::operator!() const
13116 {
13117 return !_impl;
13118 }
13119
13120 PUGI_IMPL_FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
13121 {
13122 xpath_query q(query, variables);
13123 return q.evaluate_node(*this);
13124 }
13125
13126 PUGI_IMPL_FN xpath_node xml_node::select_node(const xpath_query& query) const
13127 {
13128 return query.evaluate_node(*this);
13129 }
13130
13131 PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
13132 {
13133 xpath_query q(query, variables);
13134 return q.evaluate_node_set(*this);
13135 }
13136
13137 PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
13138 {
13139 return query.evaluate_node_set(*this);
13140 }
13141
13142 PUGI_IMPL_FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
13143 {
13144 xpath_query q(query, variables);
13145 return q.evaluate_node(*this);
13146 }
13147
13148 PUGI_IMPL_FN xpath_node xml_node::select_single_node(const xpath_query& query) const
13149 {
13150 return query.evaluate_node(*this);
13151 }
13152 }
13153
13154 #endif
13155
13156 #ifdef __BORLANDC__
13157 # pragma option pop
13158 #endif
13159
13160 // Intel C++ does not properly keep warning state for function templates,
13161 // so popping warning state at the end of translation unit leads to warnings in the middle.
13162 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
13163 # pragma warning(pop)
13164 #endif
13165
13166 #if defined(_MSC_VER) && defined(__c2__)
13167 # pragma clang diagnostic pop
13168 #endif
13169
13170 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
13171 #undef PUGI_IMPL_NO_INLINE
13172 #undef PUGI_IMPL_UNLIKELY
13173 #undef PUGI_IMPL_STATIC_ASSERT
13174 #undef PUGI_IMPL_DMC_VOLATILE
13175 #undef PUGI_IMPL_UNSIGNED_OVERFLOW
13176 #undef PUGI_IMPL_MSVC_CRT_VERSION
13177 #undef PUGI_IMPL_SNPRINTF
13178 #undef PUGI_IMPL_NS_BEGIN
13179 #undef PUGI_IMPL_NS_END
13180 #undef PUGI_IMPL_FN
13181 #undef PUGI_IMPL_FN_NO_INLINE
13182 #undef PUGI_IMPL_GETHEADER_IMPL
13183 #undef PUGI_IMPL_GETPAGE_IMPL
13184 #undef PUGI_IMPL_GETPAGE
13185 #undef PUGI_IMPL_NODETYPE
13186 #undef PUGI_IMPL_IS_CHARTYPE_IMPL
13187 #undef PUGI_IMPL_IS_CHARTYPE
13188 #undef PUGI_IMPL_IS_CHARTYPEX
13189 #undef PUGI_IMPL_ENDSWITH
13190 #undef PUGI_IMPL_SKIPWS
13191 #undef PUGI_IMPL_OPTSET
13192 #undef PUGI_IMPL_PUSHNODE
13193 #undef PUGI_IMPL_POPNODE
13194 #undef PUGI_IMPL_SCANFOR
13195 #undef PUGI_IMPL_SCANWHILE
13196 #undef PUGI_IMPL_SCANWHILE_UNROLL
13197 #undef PUGI_IMPL_ENDSEG
13198 #undef PUGI_IMPL_THROW_ERROR
13199 #undef PUGI_IMPL_CHECK_ERROR
13200
13201 #endif
13202
13203 /**
13204 * Copyright (c) 2006-2023 Arseny Kapoulkine
13205 *
13206 * Permission is hereby granted, free of charge, to any person
13207 * obtaining a copy of this software and associated documentation
13208 * files (the "Software"), to deal in the Software without
13209 * restriction, including without limitation the rights to use,
13210 * copy, modify, merge, publish, distribute, sublicense, and/or sell
13211 * copies of the Software, and to permit persons to whom the
13212 * Software is furnished to do so, subject to the following
13213 * conditions:
13214 *
13215 * The above copyright notice and this permission notice shall be
13216 * included in all copies or substantial portions of the Software.
13217 *
13218 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13219 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13220 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
13221 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13222 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13223 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
13224 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
13225 * OTHER DEALINGS IN THE SOFTWARE.
13226 */