comparison dep/pugixml/src/pugixml.cpp @ 55:d10b6c6b432e

add xml lib, we will need to use it eventually
author Paper <mrpapersonic@gmail.com>
date Tue, 26 Sep 2023 12:37:08 -0400
parents
children a45edd073f9e
comparison
equal deleted inserted replaced
54:466ac9870df9 55:d10b6c6b432e
1 /**
2 * pugixml parser - version 1.13
3 * --------------------------------------------------------
4 * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
5 * Report bugs and download new versions at https://pugixml.org/
6 *
7 * This library is distributed under the MIT License. See notice at the end
8 * of this file.
9 *
10 * This work is based on the pugxml parser, which is:
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
12 */
13
14 #ifndef SOURCE_PUGIXML_CPP
15 #define SOURCE_PUGIXML_CPP
16
17 #include "pugixml.hpp"
18
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 #include <assert.h>
23 #include <limits.h>
24
25 #ifdef PUGIXML_WCHAR_MODE
26 # include <wchar.h>
27 #endif
28
29 #ifndef PUGIXML_NO_XPATH
30 # include <math.h>
31 # include <float.h>
32 #endif
33
34 #ifndef PUGIXML_NO_STL
35 # include <istream>
36 # include <ostream>
37 # include <string>
38 #endif
39
40 // For placement new
41 #include <new>
42
43 #ifdef _MSC_VER
44 # pragma warning(push)
45 # pragma warning(disable: 4127) // conditional expression is constant
46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align())
47 # pragma warning(disable: 4702) // unreachable code
48 # pragma warning(disable: 4996) // this function or variable may be unsafe
49 #endif
50
51 #if defined(_MSC_VER) && defined(__c2__)
52 # pragma clang diagnostic push
53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe
54 #endif
55
56 #ifdef __INTEL_COMPILER
57 # pragma warning(disable: 177) // function was declared but never referenced
58 # pragma warning(disable: 279) // controlling expression is constant
59 # pragma warning(disable: 1478 1786) // function was declared "deprecated"
60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type
61 #endif
62
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY)
64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away
65 #endif
66
67 #ifdef __BORLANDC__
68 # pragma option push
69 # pragma warn -8008 // condition is always false
70 # pragma warn -8066 // unreachable code
71 #endif
72
73 #ifdef __SNC__
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug
75 # pragma diag_suppress=178 // function was declared but never referenced
76 # pragma diag_suppress=237 // controlling expression is constant
77 #endif
78
79 #ifdef __TI_COMPILER_VERSION__
80 # pragma diag_suppress 179 // function was declared but never referenced
81 #endif
82
83 // Inlining controls
84 #if defined(_MSC_VER) && _MSC_VER >= 1300
85 # define PUGI__NO_INLINE __declspec(noinline)
86 #elif defined(__GNUC__)
87 # define PUGI__NO_INLINE __attribute__((noinline))
88 #else
89 # define PUGI__NO_INLINE
90 #endif
91
92 // Branch weight controls
93 #if defined(__GNUC__) && !defined(__c2__)
94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0)
95 #else
96 # define PUGI__UNLIKELY(cond) (cond)
97 #endif
98
99 // Simple static assertion
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; }
101
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack
103 #ifdef __DMC__
104 # define PUGI__DMC_VOLATILE volatile
105 #else
106 # define PUGI__DMC_VOLATILE
107 #endif
108
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings
110 #if defined(__clang__) && defined(__has_attribute)
111 # if __has_attribute(no_sanitize)
112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow")))
113 # else
114 # define PUGI__UNSIGNED_OVERFLOW
115 # endif
116 #else
117 # define PUGI__UNSIGNED_OVERFLOW
118 #endif
119
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all)
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST)
122 using std::memcpy;
123 using std::memmove;
124 using std::memset;
125 #endif
126
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX)
129 # define LLONG_MIN (-LLONG_MAX - 1LL)
130 # define LLONG_MAX __LONG_LONG_MAX__
131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL)
132 #endif
133
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features
135 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE)
136 # define PUGI__MSVC_CRT_VERSION _MSC_VER
137 #elif defined(_WIN32_WCE)
138 # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1
139 #endif
140
141 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size.
142 #if __cplusplus >= 201103
143 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__)
144 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
145 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__)
146 #else
147 # define PUGI__SNPRINTF sprintf
148 #endif
149
150 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat.
151 #ifdef PUGIXML_HEADER_ONLY
152 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
153 # define PUGI__NS_END } }
154 # define PUGI__FN inline
155 # define PUGI__FN_NO_INLINE inline
156 #else
157 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces
158 # define PUGI__NS_BEGIN namespace pugi { namespace impl {
159 # define PUGI__NS_END } }
160 # else
161 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace {
162 # define PUGI__NS_END } } }
163 # endif
164 # define PUGI__FN
165 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE
166 #endif
167
168 // uintptr_t
169 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561)
170 namespace pugi
171 {
172 # ifndef _UINTPTR_T_DEFINED
173 typedef size_t uintptr_t;
174 # endif
175
176 typedef unsigned __int8 uint8_t;
177 typedef unsigned __int16 uint16_t;
178 typedef unsigned __int32 uint32_t;
179 }
180 #else
181 # include <stdint.h>
182 #endif
183
184 // Memory allocation
185 PUGI__NS_BEGIN
186 PUGI__FN void* default_allocate(size_t size)
187 {
188 return malloc(size);
189 }
190
191 PUGI__FN void default_deallocate(void* ptr)
192 {
193 free(ptr);
194 }
195
196 template <typename T>
197 struct xml_memory_management_function_storage
198 {
199 static allocation_function allocate;
200 static deallocation_function deallocate;
201 };
202
203 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them
204 // Without a template<> we'll get multiple definitions of the same static
205 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate;
206 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate;
207
208 typedef xml_memory_management_function_storage<int> xml_memory;
209 PUGI__NS_END
210
211 // String utilities
212 PUGI__NS_BEGIN
213 // Get string length
214 PUGI__FN size_t strlength(const char_t* s)
215 {
216 assert(s);
217
218 #ifdef PUGIXML_WCHAR_MODE
219 return wcslen(s);
220 #else
221 return strlen(s);
222 #endif
223 }
224
225 // Compare two strings
226 PUGI__FN bool strequal(const char_t* src, const char_t* dst)
227 {
228 assert(src && dst);
229
230 #ifdef PUGIXML_WCHAR_MODE
231 return wcscmp(src, dst) == 0;
232 #else
233 return strcmp(src, dst) == 0;
234 #endif
235 }
236
237 // Compare lhs with [rhs_begin, rhs_end)
238 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count)
239 {
240 for (size_t i = 0; i < count; ++i)
241 if (lhs[i] != rhs[i])
242 return false;
243
244 return lhs[count] == 0;
245 }
246
247 // Get length of wide string, even if CRT lacks wide character support
248 PUGI__FN size_t strlength_wide(const wchar_t* s)
249 {
250 assert(s);
251
252 #ifdef PUGIXML_WCHAR_MODE
253 return wcslen(s);
254 #else
255 const wchar_t* end = s;
256 while (*end) end++;
257 return static_cast<size_t>(end - s);
258 #endif
259 }
260 PUGI__NS_END
261
262 // auto_ptr-like object for exception recovery
263 PUGI__NS_BEGIN
264 template <typename T> struct auto_deleter
265 {
266 typedef void (*D)(T*);
267
268 T* data;
269 D deleter;
270
271 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_)
272 {
273 }
274
275 ~auto_deleter()
276 {
277 if (data) deleter(data);
278 }
279
280 T* release()
281 {
282 T* result = data;
283 data = 0;
284 return result;
285 }
286 };
287 PUGI__NS_END
288
289 #ifdef PUGIXML_COMPACT
290 PUGI__NS_BEGIN
291 class compact_hash_table
292 {
293 public:
294 compact_hash_table(): _items(0), _capacity(0), _count(0)
295 {
296 }
297
298 void clear()
299 {
300 if (_items)
301 {
302 xml_memory::deallocate(_items);
303 _items = 0;
304 _capacity = 0;
305 _count = 0;
306 }
307 }
308
309 void* find(const void* key)
310 {
311 if (_capacity == 0) return 0;
312
313 item_t* item = get_item(key);
314 assert(item);
315 assert(item->key == key || (item->key == 0 && item->value == 0));
316
317 return item->value;
318 }
319
320 void insert(const void* key, void* value)
321 {
322 assert(_capacity != 0 && _count < _capacity - _capacity / 4);
323
324 item_t* item = get_item(key);
325 assert(item);
326
327 if (item->key == 0)
328 {
329 _count++;
330 item->key = key;
331 }
332
333 item->value = value;
334 }
335
336 bool reserve(size_t extra = 16)
337 {
338 if (_count + extra >= _capacity - _capacity / 4)
339 return rehash(_count + extra);
340
341 return true;
342 }
343
344 private:
345 struct item_t
346 {
347 const void* key;
348 void* value;
349 };
350
351 item_t* _items;
352 size_t _capacity;
353
354 size_t _count;
355
356 bool rehash(size_t count);
357
358 item_t* get_item(const void* key)
359 {
360 assert(key);
361 assert(_capacity > 0);
362
363 size_t hashmod = _capacity - 1;
364 size_t bucket = hash(key) & hashmod;
365
366 for (size_t probe = 0; probe <= hashmod; ++probe)
367 {
368 item_t& probe_item = _items[bucket];
369
370 if (probe_item.key == key || probe_item.key == 0)
371 return &probe_item;
372
373 // hash collision, quadratic probing
374 bucket = (bucket + probe + 1) & hashmod;
375 }
376
377 assert(false && "Hash table is full"); // unreachable
378 return 0;
379 }
380
381 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key)
382 {
383 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff);
384
385 // MurmurHash3 32-bit finalizer
386 h ^= h >> 16;
387 h *= 0x85ebca6bu;
388 h ^= h >> 13;
389 h *= 0xc2b2ae35u;
390 h ^= h >> 16;
391
392 return h;
393 }
394 };
395
396 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count)
397 {
398 size_t capacity = 32;
399 while (count >= capacity - capacity / 4)
400 capacity *= 2;
401
402 compact_hash_table rt;
403 rt._capacity = capacity;
404 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity));
405
406 if (!rt._items)
407 return false;
408
409 memset(rt._items, 0, sizeof(item_t) * capacity);
410
411 for (size_t i = 0; i < _capacity; ++i)
412 if (_items[i].key)
413 rt.insert(_items[i].key, _items[i].value);
414
415 if (_items)
416 xml_memory::deallocate(_items);
417
418 _capacity = capacity;
419 _items = rt._items;
420
421 assert(_count == rt._count);
422
423 return true;
424 }
425
426 PUGI__NS_END
427 #endif
428
429 PUGI__NS_BEGIN
430 #ifdef PUGIXML_COMPACT
431 static const uintptr_t xml_memory_block_alignment = 4;
432 #else
433 static const uintptr_t xml_memory_block_alignment = sizeof(void*);
434 #endif
435
436 // extra metadata bits
437 static const uintptr_t xml_memory_page_contents_shared_mask = 64;
438 static const uintptr_t xml_memory_page_name_allocated_mask = 32;
439 static const uintptr_t xml_memory_page_value_allocated_mask = 16;
440 static const uintptr_t xml_memory_page_type_mask = 15;
441
442 // combined masks for string uniqueness
443 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask;
444 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask;
445
446 #ifdef PUGIXML_COMPACT
447 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused
448 #define PUGI__GETPAGE_IMPL(header) (header).get_page()
449 #else
450 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags))
451 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
452 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8))))
453 #endif
454
455 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header)
456 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask)
457
458 struct xml_allocator;
459
460 struct xml_memory_page
461 {
462 static xml_memory_page* construct(void* memory)
463 {
464 xml_memory_page* result = static_cast<xml_memory_page*>(memory);
465
466 result->allocator = 0;
467 result->prev = 0;
468 result->next = 0;
469 result->busy_size = 0;
470 result->freed_size = 0;
471
472 #ifdef PUGIXML_COMPACT
473 result->compact_string_base = 0;
474 result->compact_shared_parent = 0;
475 result->compact_page_marker = 0;
476 #endif
477
478 return result;
479 }
480
481 xml_allocator* allocator;
482
483 xml_memory_page* prev;
484 xml_memory_page* next;
485
486 size_t busy_size;
487 size_t freed_size;
488
489 #ifdef PUGIXML_COMPACT
490 char_t* compact_string_base;
491 void* compact_shared_parent;
492 uint32_t* compact_page_marker;
493 #endif
494 };
495
496 static const size_t xml_memory_page_size =
497 #ifdef PUGIXML_MEMORY_PAGE_SIZE
498 (PUGIXML_MEMORY_PAGE_SIZE)
499 #else
500 32768
501 #endif
502 - sizeof(xml_memory_page);
503
504 struct xml_memory_string_header
505 {
506 uint16_t page_offset; // offset from page->data
507 uint16_t full_size; // 0 if string occupies whole page
508 };
509
510 struct xml_allocator
511 {
512 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size)
513 {
514 #ifdef PUGIXML_COMPACT
515 _hash = 0;
516 #endif
517 }
518
519 xml_memory_page* allocate_page(size_t data_size)
520 {
521 size_t size = sizeof(xml_memory_page) + data_size;
522
523 // allocate block with some alignment, leaving memory for worst-case padding
524 void* memory = xml_memory::allocate(size);
525 if (!memory) return 0;
526
527 // prepare page structure
528 xml_memory_page* page = xml_memory_page::construct(memory);
529 assert(page);
530
531 assert(this == _root->allocator);
532 page->allocator = this;
533
534 return page;
535 }
536
537 static void deallocate_page(xml_memory_page* page)
538 {
539 xml_memory::deallocate(page);
540 }
541
542 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page);
543
544 void* allocate_memory(size_t size, xml_memory_page*& out_page)
545 {
546 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size))
547 return allocate_memory_oob(size, out_page);
548
549 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size;
550
551 _busy_size += size;
552
553 out_page = _root;
554
555 return buf;
556 }
557
558 #ifdef PUGIXML_COMPACT
559 void* allocate_object(size_t size, xml_memory_page*& out_page)
560 {
561 void* result = allocate_memory(size + sizeof(uint32_t), out_page);
562 if (!result) return 0;
563
564 // adjust for marker
565 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker);
566
567 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment))
568 {
569 // insert new marker
570 uint32_t* marker = static_cast<uint32_t*>(result);
571
572 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page));
573 out_page->compact_page_marker = marker;
574
575 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block
576 // this will make sure deallocate_memory correctly tracks the size
577 out_page->freed_size += sizeof(uint32_t);
578
579 return marker + 1;
580 }
581 else
582 {
583 // roll back uint32_t part
584 _busy_size -= sizeof(uint32_t);
585
586 return result;
587 }
588 }
589 #else
590 void* allocate_object(size_t size, xml_memory_page*& out_page)
591 {
592 return allocate_memory(size, out_page);
593 }
594 #endif
595
596 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page)
597 {
598 if (page == _root) page->busy_size = _busy_size;
599
600 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size);
601 (void)!ptr;
602
603 page->freed_size += size;
604 assert(page->freed_size <= page->busy_size);
605
606 if (page->freed_size == page->busy_size)
607 {
608 if (page->next == 0)
609 {
610 assert(_root == page);
611
612 // top page freed, just reset sizes
613 page->busy_size = 0;
614 page->freed_size = 0;
615
616 #ifdef PUGIXML_COMPACT
617 // reset compact state to maximize efficiency
618 page->compact_string_base = 0;
619 page->compact_shared_parent = 0;
620 page->compact_page_marker = 0;
621 #endif
622
623 _busy_size = 0;
624 }
625 else
626 {
627 assert(_root != page);
628 assert(page->prev);
629
630 // remove from the list
631 page->prev->next = page->next;
632 page->next->prev = page->prev;
633
634 // deallocate
635 deallocate_page(page);
636 }
637 }
638 }
639
640 char_t* allocate_string(size_t length)
641 {
642 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment;
643
644 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset);
645
646 // allocate memory for string and header block
647 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t);
648
649 // round size up to block alignment boundary
650 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1);
651
652 xml_memory_page* page;
653 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page));
654
655 if (!header) return 0;
656
657 // setup header
658 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page);
659
660 assert(page_offset % xml_memory_block_alignment == 0);
661 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset);
662 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment);
663
664 // full_size == 0 for large strings that occupy the whole page
665 assert(full_size % xml_memory_block_alignment == 0);
666 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0));
667 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0);
668
669 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
670 // header is guaranteed a pointer-sized alignment, which should be enough for char_t
671 return static_cast<char_t*>(static_cast<void*>(header + 1));
672 }
673
674 void deallocate_string(char_t* string)
675 {
676 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings
677 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string
678
679 // get header
680 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1;
681 assert(header);
682
683 // deallocate
684 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment;
685 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset));
686
687 // if full_size == 0 then this string occupies the whole page
688 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment;
689
690 deallocate_memory(header, full_size, page);
691 }
692
693 bool reserve()
694 {
695 #ifdef PUGIXML_COMPACT
696 return _hash->reserve();
697 #else
698 return true;
699 #endif
700 }
701
702 xml_memory_page* _root;
703 size_t _busy_size;
704
705 #ifdef PUGIXML_COMPACT
706 compact_hash_table* _hash;
707 #endif
708 };
709
710 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page)
711 {
712 const size_t large_allocation_threshold = xml_memory_page_size / 4;
713
714 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size);
715 out_page = page;
716
717 if (!page) return 0;
718
719 if (size <= large_allocation_threshold)
720 {
721 _root->busy_size = _busy_size;
722
723 // insert page at the end of linked list
724 page->prev = _root;
725 _root->next = page;
726 _root = page;
727
728 _busy_size = size;
729 }
730 else
731 {
732 // insert page before the end of linked list, so that it is deleted as soon as possible
733 // the last page is not deleted even if it's empty (see deallocate_memory)
734 assert(_root->prev);
735
736 page->prev = _root->prev;
737 page->next = _root;
738
739 _root->prev->next = page;
740 _root->prev = page;
741
742 page->busy_size = size;
743 }
744
745 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page);
746 }
747 PUGI__NS_END
748
749 #ifdef PUGIXML_COMPACT
750 PUGI__NS_BEGIN
751 static const uintptr_t compact_alignment_log2 = 2;
752 static const uintptr_t compact_alignment = 1 << compact_alignment_log2;
753
754 class compact_header
755 {
756 public:
757 compact_header(xml_memory_page* page, unsigned int flags)
758 {
759 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment);
760
761 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker));
762 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment);
763
764 _page = static_cast<unsigned char>(offset >> compact_alignment_log2);
765 _flags = static_cast<unsigned char>(flags);
766 }
767
768 void operator&=(uintptr_t mod)
769 {
770 _flags &= static_cast<unsigned char>(mod);
771 }
772
773 void operator|=(uintptr_t mod)
774 {
775 _flags |= static_cast<unsigned char>(mod);
776 }
777
778 uintptr_t operator&(uintptr_t mod) const
779 {
780 return _flags & mod;
781 }
782
783 xml_memory_page* get_page() const
784 {
785 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
786 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2);
787 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker));
788
789 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page)));
790 }
791
792 private:
793 unsigned char _page;
794 unsigned char _flags;
795 };
796
797 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset)
798 {
799 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset);
800
801 return header->get_page();
802 }
803
804 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object)
805 {
806 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object));
807 }
808
809 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value)
810 {
811 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value);
812 }
813
814 template <typename T, int header_offset, int start = -126> class compact_pointer
815 {
816 public:
817 compact_pointer(): _data(0)
818 {
819 }
820
821 void operator=(const compact_pointer& rhs)
822 {
823 *this = rhs + 0;
824 }
825
826 void operator=(T* value)
827 {
828 if (value)
829 {
830 // value is guaranteed to be compact-aligned; 'this' is not
831 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
832 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
833 // compensate for arithmetic shift rounding for negative values
834 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
835 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start;
836
837 if (static_cast<uintptr_t>(offset) <= 253)
838 _data = static_cast<unsigned char>(offset + 1);
839 else
840 {
841 compact_set_value<header_offset>(this, value);
842
843 _data = 255;
844 }
845 }
846 else
847 _data = 0;
848 }
849
850 operator T*() const
851 {
852 if (_data)
853 {
854 if (_data < 255)
855 {
856 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
857
858 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment);
859 }
860 else
861 return compact_get_value<header_offset, T>(this);
862 }
863 else
864 return 0;
865 }
866
867 T* operator->() const
868 {
869 return *this;
870 }
871
872 private:
873 unsigned char _data;
874 };
875
876 template <typename T, int header_offset> class compact_pointer_parent
877 {
878 public:
879 compact_pointer_parent(): _data(0)
880 {
881 }
882
883 void operator=(const compact_pointer_parent& rhs)
884 {
885 *this = rhs + 0;
886 }
887
888 void operator=(T* value)
889 {
890 if (value)
891 {
892 // value is guaranteed to be compact-aligned; 'this' is not
893 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*)
894 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to
895 // compensate for arithmetic shift behavior for negative values
896 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this);
897 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533;
898
899 if (static_cast<uintptr_t>(offset) <= 65533)
900 {
901 _data = static_cast<unsigned short>(offset + 1);
902 }
903 else
904 {
905 xml_memory_page* page = compact_get_page(this, header_offset);
906
907 if (PUGI__UNLIKELY(page->compact_shared_parent == 0))
908 page->compact_shared_parent = value;
909
910 if (page->compact_shared_parent == value)
911 {
912 _data = 65534;
913 }
914 else
915 {
916 compact_set_value<header_offset>(this, value);
917
918 _data = 65535;
919 }
920 }
921 }
922 else
923 {
924 _data = 0;
925 }
926 }
927
928 operator T*() const
929 {
930 if (_data)
931 {
932 if (_data < 65534)
933 {
934 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1);
935
936 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment);
937 }
938 else if (_data == 65534)
939 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent);
940 else
941 return compact_get_value<header_offset, T>(this);
942 }
943 else
944 return 0;
945 }
946
947 T* operator->() const
948 {
949 return *this;
950 }
951
952 private:
953 uint16_t _data;
954 };
955
956 template <int header_offset, int base_offset> class compact_string
957 {
958 public:
959 compact_string(): _data(0)
960 {
961 }
962
963 void operator=(const compact_string& rhs)
964 {
965 *this = rhs + 0;
966 }
967
968 void operator=(char_t* value)
969 {
970 if (value)
971 {
972 xml_memory_page* page = compact_get_page(this, header_offset);
973
974 if (PUGI__UNLIKELY(page->compact_string_base == 0))
975 page->compact_string_base = value;
976
977 ptrdiff_t offset = value - page->compact_string_base;
978
979 if (static_cast<uintptr_t>(offset) < (65535 << 7))
980 {
981 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
982 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset));
983
984 if (*base == 0)
985 {
986 *base = static_cast<uint16_t>((offset >> 7) + 1);
987 _data = static_cast<unsigned char>((offset & 127) + 1);
988 }
989 else
990 {
991 ptrdiff_t remainder = offset - ((*base - 1) << 7);
992
993 if (static_cast<uintptr_t>(remainder) <= 253)
994 {
995 _data = static_cast<unsigned char>(remainder + 1);
996 }
997 else
998 {
999 compact_set_value<header_offset>(this, value);
1000
1001 _data = 255;
1002 }
1003 }
1004 }
1005 else
1006 {
1007 compact_set_value<header_offset>(this, value);
1008
1009 _data = 255;
1010 }
1011 }
1012 else
1013 {
1014 _data = 0;
1015 }
1016 }
1017
1018 operator char_t*() const
1019 {
1020 if (_data)
1021 {
1022 if (_data < 255)
1023 {
1024 xml_memory_page* page = compact_get_page(this, header_offset);
1025
1026 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1027 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset));
1028 assert(*base);
1029
1030 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1);
1031
1032 return page->compact_string_base + offset;
1033 }
1034 else
1035 {
1036 return compact_get_value<header_offset, char_t>(this);
1037 }
1038 }
1039 else
1040 return 0;
1041 }
1042
1043 private:
1044 unsigned char _data;
1045 };
1046 PUGI__NS_END
1047 #endif
1048
1049 #ifdef PUGIXML_COMPACT
1050 namespace pugi
1051 {
1052 struct xml_attribute_struct
1053 {
1054 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0)
1055 {
1056 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8);
1057 }
1058
1059 impl::compact_header header;
1060
1061 uint16_t namevalue_base;
1062
1063 impl::compact_string<4, 2> name;
1064 impl::compact_string<5, 3> value;
1065
1066 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c;
1067 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute;
1068 };
1069
1070 struct xml_node_struct
1071 {
1072 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0)
1073 {
1074 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12);
1075 }
1076
1077 impl::compact_header header;
1078
1079 uint16_t namevalue_base;
1080
1081 impl::compact_string<4, 2> name;
1082 impl::compact_string<5, 3> value;
1083
1084 impl::compact_pointer_parent<xml_node_struct, 6> parent;
1085
1086 impl::compact_pointer<xml_node_struct, 8, 0> first_child;
1087
1088 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c;
1089 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling;
1090
1091 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute;
1092 };
1093 }
1094 #else
1095 namespace pugi
1096 {
1097 struct xml_attribute_struct
1098 {
1099 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0)
1100 {
1101 header = PUGI__GETHEADER_IMPL(this, page, 0);
1102 }
1103
1104 uintptr_t header;
1105
1106 char_t* name;
1107 char_t* value;
1108
1109 xml_attribute_struct* prev_attribute_c;
1110 xml_attribute_struct* next_attribute;
1111 };
1112
1113 struct xml_node_struct
1114 {
1115 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0)
1116 {
1117 header = PUGI__GETHEADER_IMPL(this, page, type);
1118 }
1119
1120 uintptr_t header;
1121
1122 char_t* name;
1123 char_t* value;
1124
1125 xml_node_struct* parent;
1126
1127 xml_node_struct* first_child;
1128
1129 xml_node_struct* prev_sibling_c;
1130 xml_node_struct* next_sibling;
1131
1132 xml_attribute_struct* first_attribute;
1133 };
1134 }
1135 #endif
1136
1137 PUGI__NS_BEGIN
1138 struct xml_extra_buffer
1139 {
1140 char_t* buffer;
1141 xml_extra_buffer* next;
1142 };
1143
1144 struct xml_document_struct: public xml_node_struct, public xml_allocator
1145 {
1146 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0)
1147 {
1148 }
1149
1150 const char_t* buffer;
1151
1152 xml_extra_buffer* extra_buffers;
1153
1154 #ifdef PUGIXML_COMPACT
1155 compact_hash_table hash;
1156 #endif
1157 };
1158
1159 template <typename Object> inline xml_allocator& get_allocator(const Object* object)
1160 {
1161 assert(object);
1162
1163 return *PUGI__GETPAGE(object)->allocator;
1164 }
1165
1166 template <typename Object> inline xml_document_struct& get_document(const Object* object)
1167 {
1168 assert(object);
1169
1170 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator);
1171 }
1172 PUGI__NS_END
1173
1174 // Low-level DOM operations
1175 PUGI__NS_BEGIN
1176 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc)
1177 {
1178 xml_memory_page* page;
1179 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page);
1180 if (!memory) return 0;
1181
1182 return new (memory) xml_attribute_struct(page);
1183 }
1184
1185 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type)
1186 {
1187 xml_memory_page* page;
1188 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page);
1189 if (!memory) return 0;
1190
1191 return new (memory) xml_node_struct(page, type);
1192 }
1193
1194 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc)
1195 {
1196 if (a->header & impl::xml_memory_page_name_allocated_mask)
1197 alloc.deallocate_string(a->name);
1198
1199 if (a->header & impl::xml_memory_page_value_allocated_mask)
1200 alloc.deallocate_string(a->value);
1201
1202 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a));
1203 }
1204
1205 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc)
1206 {
1207 if (n->header & impl::xml_memory_page_name_allocated_mask)
1208 alloc.deallocate_string(n->name);
1209
1210 if (n->header & impl::xml_memory_page_value_allocated_mask)
1211 alloc.deallocate_string(n->value);
1212
1213 for (xml_attribute_struct* attr = n->first_attribute; attr; )
1214 {
1215 xml_attribute_struct* next = attr->next_attribute;
1216
1217 destroy_attribute(attr, alloc);
1218
1219 attr = next;
1220 }
1221
1222 for (xml_node_struct* child = n->first_child; child; )
1223 {
1224 xml_node_struct* next = child->next_sibling;
1225
1226 destroy_node(child, alloc);
1227
1228 child = next;
1229 }
1230
1231 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n));
1232 }
1233
1234 inline void append_node(xml_node_struct* child, xml_node_struct* node)
1235 {
1236 child->parent = node;
1237
1238 xml_node_struct* head = node->first_child;
1239
1240 if (head)
1241 {
1242 xml_node_struct* tail = head->prev_sibling_c;
1243
1244 tail->next_sibling = child;
1245 child->prev_sibling_c = tail;
1246 head->prev_sibling_c = child;
1247 }
1248 else
1249 {
1250 node->first_child = child;
1251 child->prev_sibling_c = child;
1252 }
1253 }
1254
1255 inline void prepend_node(xml_node_struct* child, xml_node_struct* node)
1256 {
1257 child->parent = node;
1258
1259 xml_node_struct* head = node->first_child;
1260
1261 if (head)
1262 {
1263 child->prev_sibling_c = head->prev_sibling_c;
1264 head->prev_sibling_c = child;
1265 }
1266 else
1267 child->prev_sibling_c = child;
1268
1269 child->next_sibling = head;
1270 node->first_child = child;
1271 }
1272
1273 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node)
1274 {
1275 xml_node_struct* parent = node->parent;
1276
1277 child->parent = parent;
1278
1279 xml_node_struct* next = node->next_sibling;
1280
1281 if (next)
1282 next->prev_sibling_c = child;
1283 else
1284 parent->first_child->prev_sibling_c = child;
1285
1286 child->next_sibling = next;
1287 child->prev_sibling_c = node;
1288
1289 node->next_sibling = child;
1290 }
1291
1292 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node)
1293 {
1294 xml_node_struct* parent = node->parent;
1295
1296 child->parent = parent;
1297
1298 xml_node_struct* prev = node->prev_sibling_c;
1299
1300 if (prev->next_sibling)
1301 prev->next_sibling = child;
1302 else
1303 parent->first_child = child;
1304
1305 child->prev_sibling_c = prev;
1306 child->next_sibling = node;
1307
1308 node->prev_sibling_c = child;
1309 }
1310
1311 inline void remove_node(xml_node_struct* node)
1312 {
1313 xml_node_struct* parent = node->parent;
1314
1315 xml_node_struct* next = node->next_sibling;
1316 xml_node_struct* prev = node->prev_sibling_c;
1317
1318 if (next)
1319 next->prev_sibling_c = prev;
1320 else
1321 parent->first_child->prev_sibling_c = prev;
1322
1323 if (prev->next_sibling)
1324 prev->next_sibling = next;
1325 else
1326 parent->first_child = next;
1327
1328 node->parent = 0;
1329 node->prev_sibling_c = 0;
1330 node->next_sibling = 0;
1331 }
1332
1333 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1334 {
1335 xml_attribute_struct* head = node->first_attribute;
1336
1337 if (head)
1338 {
1339 xml_attribute_struct* tail = head->prev_attribute_c;
1340
1341 tail->next_attribute = attr;
1342 attr->prev_attribute_c = tail;
1343 head->prev_attribute_c = attr;
1344 }
1345 else
1346 {
1347 node->first_attribute = attr;
1348 attr->prev_attribute_c = attr;
1349 }
1350 }
1351
1352 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1353 {
1354 xml_attribute_struct* head = node->first_attribute;
1355
1356 if (head)
1357 {
1358 attr->prev_attribute_c = head->prev_attribute_c;
1359 head->prev_attribute_c = attr;
1360 }
1361 else
1362 attr->prev_attribute_c = attr;
1363
1364 attr->next_attribute = head;
1365 node->first_attribute = attr;
1366 }
1367
1368 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1369 {
1370 xml_attribute_struct* next = place->next_attribute;
1371
1372 if (next)
1373 next->prev_attribute_c = attr;
1374 else
1375 node->first_attribute->prev_attribute_c = attr;
1376
1377 attr->next_attribute = next;
1378 attr->prev_attribute_c = place;
1379 place->next_attribute = attr;
1380 }
1381
1382 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node)
1383 {
1384 xml_attribute_struct* prev = place->prev_attribute_c;
1385
1386 if (prev->next_attribute)
1387 prev->next_attribute = attr;
1388 else
1389 node->first_attribute = attr;
1390
1391 attr->prev_attribute_c = prev;
1392 attr->next_attribute = place;
1393 place->prev_attribute_c = attr;
1394 }
1395
1396 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node)
1397 {
1398 xml_attribute_struct* next = attr->next_attribute;
1399 xml_attribute_struct* prev = attr->prev_attribute_c;
1400
1401 if (next)
1402 next->prev_attribute_c = prev;
1403 else
1404 node->first_attribute->prev_attribute_c = prev;
1405
1406 if (prev->next_attribute)
1407 prev->next_attribute = next;
1408 else
1409 node->first_attribute = next;
1410
1411 attr->prev_attribute_c = 0;
1412 attr->next_attribute = 0;
1413 }
1414
1415 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element)
1416 {
1417 if (!alloc.reserve()) return 0;
1418
1419 xml_node_struct* child = allocate_node(alloc, type);
1420 if (!child) return 0;
1421
1422 append_node(child, node);
1423
1424 return child;
1425 }
1426
1427 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc)
1428 {
1429 if (!alloc.reserve()) return 0;
1430
1431 xml_attribute_struct* attr = allocate_attribute(alloc);
1432 if (!attr) return 0;
1433
1434 append_attribute(attr, node);
1435
1436 return attr;
1437 }
1438 PUGI__NS_END
1439
1440 // Helper classes for code generation
1441 PUGI__NS_BEGIN
1442 struct opt_false
1443 {
1444 enum { value = 0 };
1445 };
1446
1447 struct opt_true
1448 {
1449 enum { value = 1 };
1450 };
1451 PUGI__NS_END
1452
1453 // Unicode utilities
1454 PUGI__NS_BEGIN
1455 inline uint16_t endian_swap(uint16_t value)
1456 {
1457 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8));
1458 }
1459
1460 inline uint32_t endian_swap(uint32_t value)
1461 {
1462 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24);
1463 }
1464
1465 struct utf8_counter
1466 {
1467 typedef size_t value_type;
1468
1469 static value_type low(value_type result, uint32_t ch)
1470 {
1471 // U+0000..U+007F
1472 if (ch < 0x80) return result + 1;
1473 // U+0080..U+07FF
1474 else if (ch < 0x800) return result + 2;
1475 // U+0800..U+FFFF
1476 else return result + 3;
1477 }
1478
1479 static value_type high(value_type result, uint32_t)
1480 {
1481 // U+10000..U+10FFFF
1482 return result + 4;
1483 }
1484 };
1485
1486 struct utf8_writer
1487 {
1488 typedef uint8_t* value_type;
1489
1490 static value_type low(value_type result, uint32_t ch)
1491 {
1492 // U+0000..U+007F
1493 if (ch < 0x80)
1494 {
1495 *result = static_cast<uint8_t>(ch);
1496 return result + 1;
1497 }
1498 // U+0080..U+07FF
1499 else if (ch < 0x800)
1500 {
1501 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6));
1502 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1503 return result + 2;
1504 }
1505 // U+0800..U+FFFF
1506 else
1507 {
1508 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12));
1509 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1510 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1511 return result + 3;
1512 }
1513 }
1514
1515 static value_type high(value_type result, uint32_t ch)
1516 {
1517 // U+10000..U+10FFFF
1518 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18));
1519 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F));
1520 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F));
1521 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F));
1522 return result + 4;
1523 }
1524
1525 static value_type any(value_type result, uint32_t ch)
1526 {
1527 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1528 }
1529 };
1530
1531 struct utf16_counter
1532 {
1533 typedef size_t value_type;
1534
1535 static value_type low(value_type result, uint32_t)
1536 {
1537 return result + 1;
1538 }
1539
1540 static value_type high(value_type result, uint32_t)
1541 {
1542 return result + 2;
1543 }
1544 };
1545
1546 struct utf16_writer
1547 {
1548 typedef uint16_t* value_type;
1549
1550 static value_type low(value_type result, uint32_t ch)
1551 {
1552 *result = static_cast<uint16_t>(ch);
1553
1554 return result + 1;
1555 }
1556
1557 static value_type high(value_type result, uint32_t ch)
1558 {
1559 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10;
1560 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff;
1561
1562 result[0] = static_cast<uint16_t>(0xD800 + msh);
1563 result[1] = static_cast<uint16_t>(0xDC00 + lsh);
1564
1565 return result + 2;
1566 }
1567
1568 static value_type any(value_type result, uint32_t ch)
1569 {
1570 return (ch < 0x10000) ? low(result, ch) : high(result, ch);
1571 }
1572 };
1573
1574 struct utf32_counter
1575 {
1576 typedef size_t value_type;
1577
1578 static value_type low(value_type result, uint32_t)
1579 {
1580 return result + 1;
1581 }
1582
1583 static value_type high(value_type result, uint32_t)
1584 {
1585 return result + 1;
1586 }
1587 };
1588
1589 struct utf32_writer
1590 {
1591 typedef uint32_t* value_type;
1592
1593 static value_type low(value_type result, uint32_t ch)
1594 {
1595 *result = ch;
1596
1597 return result + 1;
1598 }
1599
1600 static value_type high(value_type result, uint32_t ch)
1601 {
1602 *result = ch;
1603
1604 return result + 1;
1605 }
1606
1607 static value_type any(value_type result, uint32_t ch)
1608 {
1609 *result = ch;
1610
1611 return result + 1;
1612 }
1613 };
1614
1615 struct latin1_writer
1616 {
1617 typedef uint8_t* value_type;
1618
1619 static value_type low(value_type result, uint32_t ch)
1620 {
1621 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch);
1622
1623 return result + 1;
1624 }
1625
1626 static value_type high(value_type result, uint32_t ch)
1627 {
1628 (void)ch;
1629
1630 *result = '?';
1631
1632 return result + 1;
1633 }
1634 };
1635
1636 struct utf8_decoder
1637 {
1638 typedef uint8_t type;
1639
1640 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1641 {
1642 const uint8_t utf8_byte_mask = 0x3f;
1643
1644 while (size)
1645 {
1646 uint8_t lead = *data;
1647
1648 // 0xxxxxxx -> U+0000..U+007F
1649 if (lead < 0x80)
1650 {
1651 result = Traits::low(result, lead);
1652 data += 1;
1653 size -= 1;
1654
1655 // process aligned single-byte (ascii) blocks
1656 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0)
1657 {
1658 // round-trip through void* to silence 'cast increases required alignment of target type' warnings
1659 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0)
1660 {
1661 result = Traits::low(result, data[0]);
1662 result = Traits::low(result, data[1]);
1663 result = Traits::low(result, data[2]);
1664 result = Traits::low(result, data[3]);
1665 data += 4;
1666 size -= 4;
1667 }
1668 }
1669 }
1670 // 110xxxxx -> U+0080..U+07FF
1671 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80)
1672 {
1673 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask));
1674 data += 2;
1675 size -= 2;
1676 }
1677 // 1110xxxx -> U+0800-U+FFFF
1678 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80)
1679 {
1680 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask));
1681 data += 3;
1682 size -= 3;
1683 }
1684 // 11110xxx -> U+10000..U+10FFFF
1685 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80)
1686 {
1687 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask));
1688 data += 4;
1689 size -= 4;
1690 }
1691 // 10xxxxxx or 11111xxx -> invalid
1692 else
1693 {
1694 data += 1;
1695 size -= 1;
1696 }
1697 }
1698
1699 return result;
1700 }
1701 };
1702
1703 template <typename opt_swap> struct utf16_decoder
1704 {
1705 typedef uint16_t type;
1706
1707 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits)
1708 {
1709 while (size)
1710 {
1711 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data;
1712
1713 // U+0000..U+D7FF
1714 if (lead < 0xD800)
1715 {
1716 result = Traits::low(result, lead);
1717 data += 1;
1718 size -= 1;
1719 }
1720 // U+E000..U+FFFF
1721 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000)
1722 {
1723 result = Traits::low(result, lead);
1724 data += 1;
1725 size -= 1;
1726 }
1727 // surrogate pair lead
1728 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2)
1729 {
1730 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1];
1731
1732 if (static_cast<unsigned int>(next - 0xDC00) < 0x400)
1733 {
1734 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff));
1735 data += 2;
1736 size -= 2;
1737 }
1738 else
1739 {
1740 data += 1;
1741 size -= 1;
1742 }
1743 }
1744 else
1745 {
1746 data += 1;
1747 size -= 1;
1748 }
1749 }
1750
1751 return result;
1752 }
1753 };
1754
1755 template <typename opt_swap> struct utf32_decoder
1756 {
1757 typedef uint32_t type;
1758
1759 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits)
1760 {
1761 while (size)
1762 {
1763 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data;
1764
1765 // U+0000..U+FFFF
1766 if (lead < 0x10000)
1767 {
1768 result = Traits::low(result, lead);
1769 data += 1;
1770 size -= 1;
1771 }
1772 // U+10000..U+10FFFF
1773 else
1774 {
1775 result = Traits::high(result, lead);
1776 data += 1;
1777 size -= 1;
1778 }
1779 }
1780
1781 return result;
1782 }
1783 };
1784
1785 struct latin1_decoder
1786 {
1787 typedef uint8_t type;
1788
1789 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits)
1790 {
1791 while (size)
1792 {
1793 result = Traits::low(result, *data);
1794 data += 1;
1795 size -= 1;
1796 }
1797
1798 return result;
1799 }
1800 };
1801
1802 template <size_t size> struct wchar_selector;
1803
1804 template <> struct wchar_selector<2>
1805 {
1806 typedef uint16_t type;
1807 typedef utf16_counter counter;
1808 typedef utf16_writer writer;
1809 typedef utf16_decoder<opt_false> decoder;
1810 };
1811
1812 template <> struct wchar_selector<4>
1813 {
1814 typedef uint32_t type;
1815 typedef utf32_counter counter;
1816 typedef utf32_writer writer;
1817 typedef utf32_decoder<opt_false> decoder;
1818 };
1819
1820 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter;
1821 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer;
1822
1823 struct wchar_decoder
1824 {
1825 typedef wchar_t type;
1826
1827 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits)
1828 {
1829 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder;
1830
1831 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits);
1832 }
1833 };
1834
1835 #ifdef PUGIXML_WCHAR_MODE
1836 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length)
1837 {
1838 for (size_t i = 0; i < length; ++i)
1839 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i])));
1840 }
1841 #endif
1842 PUGI__NS_END
1843
1844 PUGI__NS_BEGIN
1845 enum chartype_t
1846 {
1847 ct_parse_pcdata = 1, // \0, &, \r, <
1848 ct_parse_attr = 2, // \0, &, \r, ', "
1849 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab
1850 ct_space = 8, // \r, \n, space, tab
1851 ct_parse_cdata = 16, // \0, ], >, \r
1852 ct_parse_comment = 32, // \0, -, >, \r
1853 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, .
1854 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, :
1855 };
1856
1857 static const unsigned char chartype_table[256] =
1858 {
1859 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15
1860 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31
1861 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47
1862 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63
1863 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79
1864 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95
1865 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111
1866 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127
1867
1868 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+
1869 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1870 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1871 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1872 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1873 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1874 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192,
1875 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192
1876 };
1877
1878 enum chartypex_t
1879 {
1880 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, >
1881 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", '
1882 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _
1883 ctx_digit = 8, // 0-9
1884 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, .
1885 };
1886
1887 static const unsigned char chartypex_table[256] =
1888 {
1889 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15
1890 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31
1891 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47
1892 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63
1893
1894 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79
1895 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95
1896 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111
1897 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127
1898
1899 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+
1900 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1901 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1902 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1903 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1904 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1905 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,
1906 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20
1907 };
1908
1909 #ifdef PUGIXML_WCHAR_MODE
1910 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct))
1911 #else
1912 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct))
1913 #endif
1914
1915 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table)
1916 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table)
1917
1918 PUGI__FN bool is_little_endian()
1919 {
1920 unsigned int ui = 1;
1921
1922 return *reinterpret_cast<unsigned char*>(&ui) == 1;
1923 }
1924
1925 PUGI__FN xml_encoding get_wchar_encoding()
1926 {
1927 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4);
1928
1929 if (sizeof(wchar_t) == 2)
1930 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
1931 else
1932 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
1933 }
1934
1935 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length)
1936 {
1937 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; }
1938 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; }
1939
1940 // check if we have a non-empty XML declaration
1941 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space)))
1942 return false;
1943
1944 // scan XML declaration until the encoding field
1945 for (size_t i = 6; i + 1 < size; ++i)
1946 {
1947 // declaration can not contain ? in quoted values
1948 if (data[i] == '?')
1949 return false;
1950
1951 if (data[i] == 'e' && data[i + 1] == 'n')
1952 {
1953 size_t offset = i;
1954
1955 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed
1956 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o');
1957 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g');
1958
1959 // S? = S?
1960 PUGI__SCANCHARTYPE(ct_space);
1961 PUGI__SCANCHAR('=');
1962 PUGI__SCANCHARTYPE(ct_space);
1963
1964 // the only two valid delimiters are ' and "
1965 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\'';
1966
1967 PUGI__SCANCHAR(delimiter);
1968
1969 size_t start = offset;
1970
1971 out_encoding = data + offset;
1972
1973 PUGI__SCANCHARTYPE(ct_symbol);
1974
1975 out_length = offset - start;
1976
1977 PUGI__SCANCHAR(delimiter);
1978
1979 return true;
1980 }
1981 }
1982
1983 return false;
1984
1985 #undef PUGI__SCANCHAR
1986 #undef PUGI__SCANCHARTYPE
1987 }
1988
1989 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size)
1990 {
1991 // skip encoding autodetection if input buffer is too small
1992 if (size < 4) return encoding_utf8;
1993
1994 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3];
1995
1996 // look for BOM in first few bytes
1997 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be;
1998 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le;
1999 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be;
2000 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le;
2001 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8;
2002
2003 // look for <, <? or <?xm in various encodings
2004 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be;
2005 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le;
2006 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be;
2007 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le;
2008
2009 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early)
2010 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be;
2011 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le;
2012
2013 // no known BOM detected; parse declaration
2014 const uint8_t* enc = 0;
2015 size_t enc_length = 0;
2016
2017 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length))
2018 {
2019 // iso-8859-1 (case-insensitive)
2020 if (enc_length == 10
2021 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o'
2022 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9'
2023 && enc[8] == '-' && enc[9] == '1')
2024 return encoding_latin1;
2025
2026 // latin1 (case-insensitive)
2027 if (enc_length == 6
2028 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't'
2029 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n'
2030 && enc[5] == '1')
2031 return encoding_latin1;
2032 }
2033
2034 return encoding_utf8;
2035 }
2036
2037 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size)
2038 {
2039 // replace wchar encoding with utf implementation
2040 if (encoding == encoding_wchar) return get_wchar_encoding();
2041
2042 // replace utf16 encoding with utf16 with specific endianness
2043 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2044
2045 // replace utf32 encoding with utf32 with specific endianness
2046 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2047
2048 // only do autodetection if no explicit encoding is requested
2049 if (encoding != encoding_auto) return encoding;
2050
2051 // try to guess encoding (based on XML specification, Appendix F.1)
2052 const uint8_t* data = static_cast<const uint8_t*>(contents);
2053
2054 return guess_buffer_encoding(data, size);
2055 }
2056
2057 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2058 {
2059 size_t length = size / sizeof(char_t);
2060
2061 if (is_mutable)
2062 {
2063 out_buffer = static_cast<char_t*>(const_cast<void*>(contents));
2064 out_length = length;
2065 }
2066 else
2067 {
2068 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2069 if (!buffer) return false;
2070
2071 if (contents)
2072 memcpy(buffer, contents, length * sizeof(char_t));
2073 else
2074 assert(length == 0);
2075
2076 buffer[length] = 0;
2077
2078 out_buffer = buffer;
2079 out_length = length + 1;
2080 }
2081
2082 return true;
2083 }
2084
2085 #ifdef PUGIXML_WCHAR_MODE
2086 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re)
2087 {
2088 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) ||
2089 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be);
2090 }
2091
2092 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2093 {
2094 const char_t* data = static_cast<const char_t*>(contents);
2095 size_t length = size / sizeof(char_t);
2096
2097 if (is_mutable)
2098 {
2099 char_t* buffer = const_cast<char_t*>(data);
2100
2101 convert_wchar_endian_swap(buffer, data, length);
2102
2103 out_buffer = buffer;
2104 out_length = length;
2105 }
2106 else
2107 {
2108 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2109 if (!buffer) return false;
2110
2111 convert_wchar_endian_swap(buffer, data, length);
2112 buffer[length] = 0;
2113
2114 out_buffer = buffer;
2115 out_length = length + 1;
2116 }
2117
2118 return true;
2119 }
2120
2121 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2122 {
2123 const typename D::type* data = static_cast<const typename D::type*>(contents);
2124 size_t data_length = size / sizeof(typename D::type);
2125
2126 // first pass: get length in wchar_t units
2127 size_t length = D::process(data, data_length, 0, wchar_counter());
2128
2129 // allocate buffer of suitable length
2130 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2131 if (!buffer) return false;
2132
2133 // second pass: convert utf16 input to wchar_t
2134 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer);
2135 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer());
2136
2137 assert(oend == obegin + length);
2138 *oend = 0;
2139
2140 out_buffer = buffer;
2141 out_length = length + 1;
2142
2143 return true;
2144 }
2145
2146 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2147 {
2148 // get native encoding
2149 xml_encoding wchar_encoding = get_wchar_encoding();
2150
2151 // fast path: no conversion required
2152 if (encoding == wchar_encoding)
2153 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2154
2155 // only endian-swapping is required
2156 if (need_endian_swap_utf(encoding, wchar_encoding))
2157 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable);
2158
2159 // source encoding is utf8
2160 if (encoding == encoding_utf8)
2161 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder());
2162
2163 // source encoding is utf16
2164 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2165 {
2166 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2167
2168 return (native_encoding == encoding) ?
2169 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2170 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2171 }
2172
2173 // source encoding is utf32
2174 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2175 {
2176 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2177
2178 return (native_encoding == encoding) ?
2179 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2180 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2181 }
2182
2183 // source encoding is latin1
2184 if (encoding == encoding_latin1)
2185 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder());
2186
2187 assert(false && "Invalid encoding"); // unreachable
2188 return false;
2189 }
2190 #else
2191 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D)
2192 {
2193 const typename D::type* data = static_cast<const typename D::type*>(contents);
2194 size_t data_length = size / sizeof(typename D::type);
2195
2196 // first pass: get length in utf8 units
2197 size_t length = D::process(data, data_length, 0, utf8_counter());
2198
2199 // allocate buffer of suitable length
2200 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2201 if (!buffer) return false;
2202
2203 // second pass: convert utf16 input to utf8
2204 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2205 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer());
2206
2207 assert(oend == obegin + length);
2208 *oend = 0;
2209
2210 out_buffer = buffer;
2211 out_length = length + 1;
2212
2213 return true;
2214 }
2215
2216 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size)
2217 {
2218 for (size_t i = 0; i < size; ++i)
2219 if (data[i] > 127)
2220 return i;
2221
2222 return size;
2223 }
2224
2225 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable)
2226 {
2227 const uint8_t* data = static_cast<const uint8_t*>(contents);
2228 size_t data_length = size;
2229
2230 // get size of prefix that does not need utf8 conversion
2231 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length);
2232 assert(prefix_length <= data_length);
2233
2234 const uint8_t* postfix = data + prefix_length;
2235 size_t postfix_length = data_length - prefix_length;
2236
2237 // if no conversion is needed, just return the original buffer
2238 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2239
2240 // first pass: get length in utf8 units
2241 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter());
2242
2243 // allocate buffer of suitable length
2244 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
2245 if (!buffer) return false;
2246
2247 // second pass: convert latin1 input to utf8
2248 memcpy(buffer, data, prefix_length);
2249
2250 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer);
2251 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer());
2252
2253 assert(oend == obegin + length);
2254 *oend = 0;
2255
2256 out_buffer = buffer;
2257 out_length = length + 1;
2258
2259 return true;
2260 }
2261
2262 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable)
2263 {
2264 // fast path: no conversion required
2265 if (encoding == encoding_utf8)
2266 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable);
2267
2268 // source encoding is utf16
2269 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
2270 {
2271 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
2272
2273 return (native_encoding == encoding) ?
2274 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) :
2275 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>());
2276 }
2277
2278 // source encoding is utf32
2279 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
2280 {
2281 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
2282
2283 return (native_encoding == encoding) ?
2284 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) :
2285 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>());
2286 }
2287
2288 // source encoding is latin1
2289 if (encoding == encoding_latin1)
2290 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable);
2291
2292 assert(false && "Invalid encoding"); // unreachable
2293 return false;
2294 }
2295 #endif
2296
2297 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length)
2298 {
2299 // get length in utf8 characters
2300 return wchar_decoder::process(str, length, 0, utf8_counter());
2301 }
2302
2303 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length)
2304 {
2305 // convert to utf8
2306 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer);
2307 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer());
2308
2309 assert(begin + size == end);
2310 (void)!end;
2311 (void)!size;
2312 }
2313
2314 #ifndef PUGIXML_NO_STL
2315 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length)
2316 {
2317 // first pass: get length in utf8 characters
2318 size_t size = as_utf8_begin(str, length);
2319
2320 // allocate resulting string
2321 std::string result;
2322 result.resize(size);
2323
2324 // second pass: convert to utf8
2325 if (size > 0) as_utf8_end(&result[0], size, str, length);
2326
2327 return result;
2328 }
2329
2330 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size)
2331 {
2332 const uint8_t* data = reinterpret_cast<const uint8_t*>(str);
2333
2334 // first pass: get length in wchar_t units
2335 size_t length = utf8_decoder::process(data, size, 0, wchar_counter());
2336
2337 // allocate resulting string
2338 std::basic_string<wchar_t> result;
2339 result.resize(length);
2340
2341 // second pass: convert to wchar_t
2342 if (length > 0)
2343 {
2344 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]);
2345 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer());
2346
2347 assert(begin + length == end);
2348 (void)!end;
2349 }
2350
2351 return result;
2352 }
2353 #endif
2354
2355 template <typename Header>
2356 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target)
2357 {
2358 // never reuse shared memory
2359 if (header & xml_memory_page_contents_shared_mask) return false;
2360
2361 size_t target_length = strlength(target);
2362
2363 // always reuse document buffer memory if possible
2364 if ((header & header_mask) == 0) return target_length >= length;
2365
2366 // reuse heap memory if waste is not too great
2367 const size_t reuse_threshold = 32;
2368
2369 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2);
2370 }
2371
2372 template <typename String, typename Header>
2373 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length)
2374 {
2375 if (source_length == 0)
2376 {
2377 // empty string and null pointer are equivalent, so just deallocate old memory
2378 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2379
2380 if (header & header_mask) alloc->deallocate_string(dest);
2381
2382 // mark the string as not allocated
2383 dest = 0;
2384 header &= ~header_mask;
2385
2386 return true;
2387 }
2388 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest))
2389 {
2390 // we can reuse old buffer, so just copy the new data (including zero terminator)
2391 memcpy(dest, source, source_length * sizeof(char_t));
2392 dest[source_length] = 0;
2393
2394 return true;
2395 }
2396 else
2397 {
2398 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator;
2399
2400 if (!alloc->reserve()) return false;
2401
2402 // allocate new buffer
2403 char_t* buf = alloc->allocate_string(source_length + 1);
2404 if (!buf) return false;
2405
2406 // copy the string (including zero terminator)
2407 memcpy(buf, source, source_length * sizeof(char_t));
2408 buf[source_length] = 0;
2409
2410 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures)
2411 if (header & header_mask) alloc->deallocate_string(dest);
2412
2413 // the string is now allocated, so set the flag
2414 dest = buf;
2415 header |= header_mask;
2416
2417 return true;
2418 }
2419 }
2420
2421 struct gap
2422 {
2423 char_t* end;
2424 size_t size;
2425
2426 gap(): end(0), size(0)
2427 {
2428 }
2429
2430 // Push new gap, move s count bytes further (skipping the gap).
2431 // Collapse previous gap.
2432 void push(char_t*& s, size_t count)
2433 {
2434 if (end) // there was a gap already; collapse it
2435 {
2436 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...)
2437 assert(s >= end);
2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2439 }
2440
2441 s += count; // end of current gap
2442
2443 // "merge" two gaps
2444 end = s;
2445 size += count;
2446 }
2447
2448 // Collapse all gaps, return past-the-end pointer
2449 char_t* flush(char_t* s)
2450 {
2451 if (end)
2452 {
2453 // Move [old_gap_end, current_pos) to [old_gap_start, ...)
2454 assert(s >= end);
2455 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end));
2456
2457 return s - size;
2458 }
2459 else return s;
2460 }
2461 };
2462
2463 PUGI__FN char_t* strconv_escape(char_t* s, gap& g)
2464 {
2465 char_t* stre = s + 1;
2466
2467 switch (*stre)
2468 {
2469 case '#': // &#...
2470 {
2471 unsigned int ucsc = 0;
2472
2473 if (stre[1] == 'x') // &#x... (hex code)
2474 {
2475 stre += 2;
2476
2477 char_t ch = *stre;
2478
2479 if (ch == ';') return stre;
2480
2481 for (;;)
2482 {
2483 if (static_cast<unsigned int>(ch - '0') <= 9)
2484 ucsc = 16 * ucsc + (ch - '0');
2485 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5)
2486 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10);
2487 else if (ch == ';')
2488 break;
2489 else // cancel
2490 return stre;
2491
2492 ch = *++stre;
2493 }
2494
2495 ++stre;
2496 }
2497 else // &#... (dec code)
2498 {
2499 char_t ch = *++stre;
2500
2501 if (ch == ';') return stre;
2502
2503 for (;;)
2504 {
2505 if (static_cast<unsigned int>(ch - '0') <= 9)
2506 ucsc = 10 * ucsc + (ch - '0');
2507 else if (ch == ';')
2508 break;
2509 else // cancel
2510 return stre;
2511
2512 ch = *++stre;
2513 }
2514
2515 ++stre;
2516 }
2517
2518 #ifdef PUGIXML_WCHAR_MODE
2519 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc));
2520 #else
2521 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc));
2522 #endif
2523
2524 g.push(s, stre - s);
2525 return stre;
2526 }
2527
2528 case 'a': // &a
2529 {
2530 ++stre;
2531
2532 if (*stre == 'm') // &am
2533 {
2534 if (*++stre == 'p' && *++stre == ';') // &amp;
2535 {
2536 *s++ = '&';
2537 ++stre;
2538
2539 g.push(s, stre - s);
2540 return stre;
2541 }
2542 }
2543 else if (*stre == 'p') // &ap
2544 {
2545 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // &apos;
2546 {
2547 *s++ = '\'';
2548 ++stre;
2549
2550 g.push(s, stre - s);
2551 return stre;
2552 }
2553 }
2554 break;
2555 }
2556
2557 case 'g': // &g
2558 {
2559 if (*++stre == 't' && *++stre == ';') // &gt;
2560 {
2561 *s++ = '>';
2562 ++stre;
2563
2564 g.push(s, stre - s);
2565 return stre;
2566 }
2567 break;
2568 }
2569
2570 case 'l': // &l
2571 {
2572 if (*++stre == 't' && *++stre == ';') // &lt;
2573 {
2574 *s++ = '<';
2575 ++stre;
2576
2577 g.push(s, stre - s);
2578 return stre;
2579 }
2580 break;
2581 }
2582
2583 case 'q': // &q
2584 {
2585 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // &quot;
2586 {
2587 *s++ = '"';
2588 ++stre;
2589
2590 g.push(s, stre - s);
2591 return stre;
2592 }
2593 break;
2594 }
2595
2596 default:
2597 break;
2598 }
2599
2600 return stre;
2601 }
2602
2603 // Parser utilities
2604 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e)))
2605 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; }
2606 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) )
2607 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); }
2608 #define PUGI__POPNODE() { cursor = cursor->parent; }
2609 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; }
2610 #define PUGI__SCANWHILE(X) { while (X) ++s; }
2611 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } }
2612 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; }
2613 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0)
2614 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); }
2615
2616 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch)
2617 {
2618 gap g;
2619
2620 while (true)
2621 {
2622 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment));
2623
2624 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2625 {
2626 *s++ = '\n'; // replace first one with 0x0a
2627
2628 if (*s == '\n') g.push(s, 1);
2629 }
2630 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here
2631 {
2632 *g.flush(s) = 0;
2633
2634 return s + (s[2] == '>' ? 3 : 2);
2635 }
2636 else if (*s == 0)
2637 {
2638 return 0;
2639 }
2640 else ++s;
2641 }
2642 }
2643
2644 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch)
2645 {
2646 gap g;
2647
2648 while (true)
2649 {
2650 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata));
2651
2652 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2653 {
2654 *s++ = '\n'; // replace first one with 0x0a
2655
2656 if (*s == '\n') g.push(s, 1);
2657 }
2658 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here
2659 {
2660 *g.flush(s) = 0;
2661
2662 return s + 1;
2663 }
2664 else if (*s == 0)
2665 {
2666 return 0;
2667 }
2668 else ++s;
2669 }
2670 }
2671
2672 typedef char_t* (*strconv_pcdata_t)(char_t*);
2673
2674 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl
2675 {
2676 static char_t* parse(char_t* s)
2677 {
2678 gap g;
2679
2680 char_t* begin = s;
2681
2682 while (true)
2683 {
2684 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata));
2685
2686 if (*s == '<') // PCDATA ends here
2687 {
2688 char_t* end = g.flush(s);
2689
2690 if (opt_trim::value)
2691 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2692 --end;
2693
2694 *end = 0;
2695
2696 return s + 1;
2697 }
2698 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair
2699 {
2700 *s++ = '\n'; // replace first one with 0x0a
2701
2702 if (*s == '\n') g.push(s, 1);
2703 }
2704 else if (opt_escape::value && *s == '&')
2705 {
2706 s = strconv_escape(s, g);
2707 }
2708 else if (*s == 0)
2709 {
2710 char_t* end = g.flush(s);
2711
2712 if (opt_trim::value)
2713 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space))
2714 --end;
2715
2716 *end = 0;
2717
2718 return s;
2719 }
2720 else ++s;
2721 }
2722 }
2723 };
2724
2725 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask)
2726 {
2727 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800);
2728
2729 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above
2730 {
2731 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse;
2732 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse;
2733 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse;
2734 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse;
2735 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse;
2736 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse;
2737 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse;
2738 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse;
2739 default: assert(false); return 0; // unreachable
2740 }
2741 }
2742
2743 typedef char_t* (*strconv_attribute_t)(char_t*, char_t);
2744
2745 template <typename opt_escape> struct strconv_attribute_impl
2746 {
2747 static char_t* parse_wnorm(char_t* s, char_t end_quote)
2748 {
2749 gap g;
2750
2751 // trim leading whitespaces
2752 if (PUGI__IS_CHARTYPE(*s, ct_space))
2753 {
2754 char_t* str = s;
2755
2756 do ++str;
2757 while (PUGI__IS_CHARTYPE(*str, ct_space));
2758
2759 g.push(s, str - s);
2760 }
2761
2762 while (true)
2763 {
2764 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space));
2765
2766 if (*s == end_quote)
2767 {
2768 char_t* str = g.flush(s);
2769
2770 do *str-- = 0;
2771 while (PUGI__IS_CHARTYPE(*str, ct_space));
2772
2773 return s + 1;
2774 }
2775 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2776 {
2777 *s++ = ' ';
2778
2779 if (PUGI__IS_CHARTYPE(*s, ct_space))
2780 {
2781 char_t* str = s + 1;
2782 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str;
2783
2784 g.push(s, str - s);
2785 }
2786 }
2787 else if (opt_escape::value && *s == '&')
2788 {
2789 s = strconv_escape(s, g);
2790 }
2791 else if (!*s)
2792 {
2793 return 0;
2794 }
2795 else ++s;
2796 }
2797 }
2798
2799 static char_t* parse_wconv(char_t* s, char_t end_quote)
2800 {
2801 gap g;
2802
2803 while (true)
2804 {
2805 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws));
2806
2807 if (*s == end_quote)
2808 {
2809 *g.flush(s) = 0;
2810
2811 return s + 1;
2812 }
2813 else if (PUGI__IS_CHARTYPE(*s, ct_space))
2814 {
2815 if (*s == '\r')
2816 {
2817 *s++ = ' ';
2818
2819 if (*s == '\n') g.push(s, 1);
2820 }
2821 else *s++ = ' ';
2822 }
2823 else if (opt_escape::value && *s == '&')
2824 {
2825 s = strconv_escape(s, g);
2826 }
2827 else if (!*s)
2828 {
2829 return 0;
2830 }
2831 else ++s;
2832 }
2833 }
2834
2835 static char_t* parse_eol(char_t* s, char_t end_quote)
2836 {
2837 gap g;
2838
2839 while (true)
2840 {
2841 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2842
2843 if (*s == end_quote)
2844 {
2845 *g.flush(s) = 0;
2846
2847 return s + 1;
2848 }
2849 else if (*s == '\r')
2850 {
2851 *s++ = '\n';
2852
2853 if (*s == '\n') g.push(s, 1);
2854 }
2855 else if (opt_escape::value && *s == '&')
2856 {
2857 s = strconv_escape(s, g);
2858 }
2859 else if (!*s)
2860 {
2861 return 0;
2862 }
2863 else ++s;
2864 }
2865 }
2866
2867 static char_t* parse_simple(char_t* s, char_t end_quote)
2868 {
2869 gap g;
2870
2871 while (true)
2872 {
2873 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr));
2874
2875 if (*s == end_quote)
2876 {
2877 *g.flush(s) = 0;
2878
2879 return s + 1;
2880 }
2881 else if (opt_escape::value && *s == '&')
2882 {
2883 s = strconv_escape(s, g);
2884 }
2885 else if (!*s)
2886 {
2887 return 0;
2888 }
2889 else ++s;
2890 }
2891 }
2892 };
2893
2894 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask)
2895 {
2896 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80);
2897
2898 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above
2899 {
2900 case 0: return strconv_attribute_impl<opt_false>::parse_simple;
2901 case 1: return strconv_attribute_impl<opt_true>::parse_simple;
2902 case 2: return strconv_attribute_impl<opt_false>::parse_eol;
2903 case 3: return strconv_attribute_impl<opt_true>::parse_eol;
2904 case 4: return strconv_attribute_impl<opt_false>::parse_wconv;
2905 case 5: return strconv_attribute_impl<opt_true>::parse_wconv;
2906 case 6: return strconv_attribute_impl<opt_false>::parse_wconv;
2907 case 7: return strconv_attribute_impl<opt_true>::parse_wconv;
2908 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm;
2909 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm;
2910 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm;
2911 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm;
2912 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm;
2913 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm;
2914 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm;
2915 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm;
2916 default: assert(false); return 0; // unreachable
2917 }
2918 }
2919
2920 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0)
2921 {
2922 xml_parse_result result;
2923 result.status = status;
2924 result.offset = offset;
2925
2926 return result;
2927 }
2928
2929 struct xml_parser
2930 {
2931 xml_allocator* alloc;
2932 char_t* error_offset;
2933 xml_parse_status error_status;
2934
2935 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok)
2936 {
2937 }
2938
2939 // DOCTYPE consists of nested sections of the following possible types:
2940 // <!-- ... -->, <? ... ?>, "...", '...'
2941 // <![...]]>
2942 // <!...>
2943 // First group can not contain nested groups
2944 // Second group can contain nested groups of the same type
2945 // Third group can contain all other groups
2946 char_t* parse_doctype_primitive(char_t* s)
2947 {
2948 if (*s == '"' || *s == '\'')
2949 {
2950 // quoted string
2951 char_t ch = *s++;
2952 PUGI__SCANFOR(*s == ch);
2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2954
2955 s++;
2956 }
2957 else if (s[0] == '<' && s[1] == '?')
2958 {
2959 // <? ... ?>
2960 s += 2;
2961 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype
2962 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2963
2964 s += 2;
2965 }
2966 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-')
2967 {
2968 s += 4;
2969 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype
2970 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s);
2971
2972 s += 3;
2973 }
2974 else PUGI__THROW_ERROR(status_bad_doctype, s);
2975
2976 return s;
2977 }
2978
2979 char_t* parse_doctype_ignore(char_t* s)
2980 {
2981 size_t depth = 0;
2982
2983 assert(s[0] == '<' && s[1] == '!' && s[2] == '[');
2984 s += 3;
2985
2986 while (*s)
2987 {
2988 if (s[0] == '<' && s[1] == '!' && s[2] == '[')
2989 {
2990 // nested ignore section
2991 s += 3;
2992 depth++;
2993 }
2994 else if (s[0] == ']' && s[1] == ']' && s[2] == '>')
2995 {
2996 // ignore section end
2997 s += 3;
2998
2999 if (depth == 0)
3000 return s;
3001
3002 depth--;
3003 }
3004 else s++;
3005 }
3006
3007 PUGI__THROW_ERROR(status_bad_doctype, s);
3008 }
3009
3010 char_t* parse_doctype_group(char_t* s, char_t endch)
3011 {
3012 size_t depth = 0;
3013
3014 assert((s[0] == '<' || s[0] == 0) && s[1] == '!');
3015 s += 2;
3016
3017 while (*s)
3018 {
3019 if (s[0] == '<' && s[1] == '!' && s[2] != '-')
3020 {
3021 if (s[2] == '[')
3022 {
3023 // ignore
3024 s = parse_doctype_ignore(s);
3025 if (!s) return s;
3026 }
3027 else
3028 {
3029 // some control group
3030 s += 2;
3031 depth++;
3032 }
3033 }
3034 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'')
3035 {
3036 // unknown tag (forbidden), or some primitive group
3037 s = parse_doctype_primitive(s);
3038 if (!s) return s;
3039 }
3040 else if (*s == '>')
3041 {
3042 if (depth == 0)
3043 return s;
3044
3045 depth--;
3046 s++;
3047 }
3048 else s++;
3049 }
3050
3051 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s);
3052
3053 return s;
3054 }
3055
3056 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch)
3057 {
3058 // parse node contents, starting with exclamation mark
3059 ++s;
3060
3061 if (*s == '-') // '<!-...'
3062 {
3063 ++s;
3064
3065 if (*s == '-') // '<!--...'
3066 {
3067 ++s;
3068
3069 if (PUGI__OPTSET(parse_comments))
3070 {
3071 PUGI__PUSHNODE(node_comment); // Append a new node on the tree.
3072 cursor->value = s; // Save the offset.
3073 }
3074
3075 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments))
3076 {
3077 s = strconv_comment(s, endch);
3078
3079 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value);
3080 }
3081 else
3082 {
3083 // Scan for terminating '-->'.
3084 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>'));
3085 PUGI__CHECK_ERROR(status_bad_comment, s);
3086
3087 if (PUGI__OPTSET(parse_comments))
3088 *s = 0; // Zero-terminate this segment at the first terminating '-'.
3089
3090 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'.
3091 }
3092 }
3093 else PUGI__THROW_ERROR(status_bad_comment, s);
3094 }
3095 else if (*s == '[')
3096 {
3097 // '<![CDATA[...'
3098 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[')
3099 {
3100 ++s;
3101
3102 if (PUGI__OPTSET(parse_cdata))
3103 {
3104 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree.
3105 cursor->value = s; // Save the offset.
3106
3107 if (PUGI__OPTSET(parse_eol))
3108 {
3109 s = strconv_cdata(s, endch);
3110
3111 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value);
3112 }
3113 else
3114 {
3115 // Scan for terminating ']]>'.
3116 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3117 PUGI__CHECK_ERROR(status_bad_cdata, s);
3118
3119 *s++ = 0; // Zero-terminate this segment.
3120 }
3121 }
3122 else // Flagged for discard, but we still have to scan for the terminator.
3123 {
3124 // Scan for terminating ']]>'.
3125 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>'));
3126 PUGI__CHECK_ERROR(status_bad_cdata, s);
3127
3128 ++s;
3129 }
3130
3131 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'.
3132 }
3133 else PUGI__THROW_ERROR(status_bad_cdata, s);
3134 }
3135 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E'))
3136 {
3137 s -= 2;
3138
3139 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s);
3140
3141 char_t* mark = s + 9;
3142
3143 s = parse_doctype_group(s, endch);
3144 if (!s) return s;
3145
3146 assert((*s == 0 && endch == '>') || *s == '>');
3147 if (*s) *s++ = 0;
3148
3149 if (PUGI__OPTSET(parse_doctype))
3150 {
3151 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark;
3152
3153 PUGI__PUSHNODE(node_doctype);
3154
3155 cursor->value = mark;
3156 }
3157 }
3158 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s);
3159 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s);
3160 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3161
3162 return s;
3163 }
3164
3165 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch)
3166 {
3167 // load into registers
3168 xml_node_struct* cursor = ref_cursor;
3169 char_t ch = 0;
3170
3171 // parse node contents, starting with question mark
3172 ++s;
3173
3174 // read PI target
3175 char_t* target = s;
3176
3177 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s);
3178
3179 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol));
3180 PUGI__CHECK_ERROR(status_bad_pi, s);
3181
3182 // determine node type; stricmp / strcasecmp is not portable
3183 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s;
3184
3185 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi))
3186 {
3187 if (declaration)
3188 {
3189 // disallow non top-level declarations
3190 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s);
3191
3192 PUGI__PUSHNODE(node_declaration);
3193 }
3194 else
3195 {
3196 PUGI__PUSHNODE(node_pi);
3197 }
3198
3199 cursor->name = target;
3200
3201 PUGI__ENDSEG();
3202
3203 // parse value/attributes
3204 if (ch == '?')
3205 {
3206 // empty node
3207 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s);
3208 s += (*s == '>');
3209
3210 PUGI__POPNODE();
3211 }
3212 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3213 {
3214 PUGI__SKIPWS();
3215
3216 // scan for tag end
3217 char_t* value = s;
3218
3219 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3220 PUGI__CHECK_ERROR(status_bad_pi, s);
3221
3222 if (declaration)
3223 {
3224 // replace ending ? with / so that 'element' terminates properly
3225 *s = '/';
3226
3227 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES
3228 s = value;
3229 }
3230 else
3231 {
3232 // store value and step over >
3233 cursor->value = value;
3234
3235 PUGI__POPNODE();
3236
3237 PUGI__ENDSEG();
3238
3239 s += (*s == '>');
3240 }
3241 }
3242 else PUGI__THROW_ERROR(status_bad_pi, s);
3243 }
3244 else
3245 {
3246 // scan for tag end
3247 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>'));
3248 PUGI__CHECK_ERROR(status_bad_pi, s);
3249
3250 s += (s[1] == '>' ? 2 : 1);
3251 }
3252
3253 // store from registers
3254 ref_cursor = cursor;
3255
3256 return s;
3257 }
3258
3259 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch)
3260 {
3261 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk);
3262 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk);
3263
3264 char_t ch = 0;
3265 xml_node_struct* cursor = root;
3266 char_t* mark = s;
3267
3268 while (*s != 0)
3269 {
3270 if (*s == '<')
3271 {
3272 ++s;
3273
3274 LOC_TAG:
3275 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...'
3276 {
3277 PUGI__PUSHNODE(node_element); // Append a new node to the tree.
3278
3279 cursor->name = s;
3280
3281 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3282 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3283
3284 if (ch == '>')
3285 {
3286 // end of tag
3287 }
3288 else if (PUGI__IS_CHARTYPE(ch, ct_space))
3289 {
3290 LOC_ATTRIBUTES:
3291 while (true)
3292 {
3293 PUGI__SKIPWS(); // Eat any whitespace.
3294
3295 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #...
3296 {
3297 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute.
3298 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s);
3299
3300 a->name = s; // Save the offset.
3301
3302 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator.
3303 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over.
3304
3305 if (PUGI__IS_CHARTYPE(ch, ct_space))
3306 {
3307 PUGI__SKIPWS(); // Eat any whitespace.
3308
3309 ch = *s;
3310 ++s;
3311 }
3312
3313 if (ch == '=') // '<... #=...'
3314 {
3315 PUGI__SKIPWS(); // Eat any whitespace.
3316
3317 if (*s == '"' || *s == '\'') // '<... #="...'
3318 {
3319 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'.
3320 ++s; // Step over the quote.
3321 a->value = s; // Save the offset.
3322
3323 s = strconv_attribute(s, ch);
3324
3325 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value);
3326
3327 // After this line the loop continues from the start;
3328 // Whitespaces, / and > are ok, symbols and EOF are wrong,
3329 // everything else will be detected
3330 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s);
3331 }
3332 else PUGI__THROW_ERROR(status_bad_attribute, s);
3333 }
3334 else PUGI__THROW_ERROR(status_bad_attribute, s);
3335 }
3336 else if (*s == '/')
3337 {
3338 ++s;
3339
3340 if (*s == '>')
3341 {
3342 PUGI__POPNODE();
3343 s++;
3344 break;
3345 }
3346 else if (*s == 0 && endch == '>')
3347 {
3348 PUGI__POPNODE();
3349 break;
3350 }
3351 else PUGI__THROW_ERROR(status_bad_start_element, s);
3352 }
3353 else if (*s == '>')
3354 {
3355 ++s;
3356
3357 break;
3358 }
3359 else if (*s == 0 && endch == '>')
3360 {
3361 break;
3362 }
3363 else PUGI__THROW_ERROR(status_bad_start_element, s);
3364 }
3365
3366 // !!!
3367 }
3368 else if (ch == '/') // '<#.../'
3369 {
3370 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s);
3371
3372 PUGI__POPNODE(); // Pop.
3373
3374 s += (*s == '>');
3375 }
3376 else if (ch == 0)
3377 {
3378 // we stepped over null terminator, backtrack & handle closing tag
3379 --s;
3380
3381 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s);
3382 }
3383 else PUGI__THROW_ERROR(status_bad_start_element, s);
3384 }
3385 else if (*s == '/')
3386 {
3387 ++s;
3388
3389 mark = s;
3390
3391 char_t* name = cursor->name;
3392 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3393
3394 while (PUGI__IS_CHARTYPE(*s, ct_symbol))
3395 {
3396 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3397 }
3398
3399 if (*name)
3400 {
3401 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s);
3402 else PUGI__THROW_ERROR(status_end_element_mismatch, mark);
3403 }
3404
3405 PUGI__POPNODE(); // Pop.
3406
3407 PUGI__SKIPWS();
3408
3409 if (*s == 0)
3410 {
3411 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3412 }
3413 else
3414 {
3415 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s);
3416 ++s;
3417 }
3418 }
3419 else if (*s == '?') // '<?...'
3420 {
3421 s = parse_question(s, cursor, optmsk, endch);
3422 if (!s) return s;
3423
3424 assert(cursor);
3425 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES;
3426 }
3427 else if (*s == '!') // '<!...'
3428 {
3429 s = parse_exclamation(s, cursor, optmsk, endch);
3430 if (!s) return s;
3431 }
3432 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s);
3433 else PUGI__THROW_ERROR(status_unrecognized_tag, s);
3434 }
3435 else
3436 {
3437 mark = s; // Save this offset while searching for a terminator.
3438
3439 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here.
3440
3441 if (*s == '<' || !*s)
3442 {
3443 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one
3444 assert(mark != s);
3445
3446 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata))
3447 {
3448 continue;
3449 }
3450 else if (PUGI__OPTSET(parse_ws_pcdata_single))
3451 {
3452 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue;
3453 }
3454 }
3455
3456 if (!PUGI__OPTSET(parse_trim_pcdata))
3457 s = mark;
3458
3459 if (cursor->parent || PUGI__OPTSET(parse_fragment))
3460 {
3461 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value)
3462 {
3463 cursor->value = s; // Save the offset.
3464 }
3465 else
3466 {
3467 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree.
3468
3469 cursor->value = s; // Save the offset.
3470
3471 PUGI__POPNODE(); // Pop since this is a standalone.
3472 }
3473
3474 s = strconv_pcdata(s);
3475
3476 if (!*s) break;
3477 }
3478 else
3479 {
3480 PUGI__SCANFOR(*s == '<'); // '...<'
3481 if (!*s) break;
3482
3483 ++s;
3484 }
3485
3486 // We're after '<'
3487 goto LOC_TAG;
3488 }
3489 }
3490
3491 // check that last tag is closed
3492 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s);
3493
3494 return s;
3495 }
3496
3497 #ifdef PUGIXML_WCHAR_MODE
3498 static char_t* parse_skip_bom(char_t* s)
3499 {
3500 unsigned int bom = 0xfeff;
3501 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s;
3502 }
3503 #else
3504 static char_t* parse_skip_bom(char_t* s)
3505 {
3506 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s;
3507 }
3508 #endif
3509
3510 static bool has_element_node_siblings(xml_node_struct* node)
3511 {
3512 while (node)
3513 {
3514 if (PUGI__NODETYPE(node) == node_element) return true;
3515
3516 node = node->next_sibling;
3517 }
3518
3519 return false;
3520 }
3521
3522 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk)
3523 {
3524 // early-out for empty documents
3525 if (length == 0)
3526 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element);
3527
3528 // get last child of the root before parsing
3529 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0;
3530
3531 // create parser on stack
3532 xml_parser parser(static_cast<xml_allocator*>(xmldoc));
3533
3534 // save last character and make buffer zero-terminated (speeds up parsing)
3535 char_t endch = buffer[length - 1];
3536 buffer[length - 1] = 0;
3537
3538 // skip BOM to make sure it does not end up as part of parse output
3539 char_t* buffer_data = parse_skip_bom(buffer);
3540
3541 // perform actual parsing
3542 parser.parse_tree(buffer_data, root, optmsk, endch);
3543
3544 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0);
3545 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length);
3546
3547 if (result)
3548 {
3549 // since we removed last character, we have to handle the only possible false positive (stray <)
3550 if (endch == '<')
3551 return make_parse_result(status_unrecognized_tag, length - 1);
3552
3553 // check if there are any element nodes parsed
3554 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0;
3555
3556 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed))
3557 return make_parse_result(status_no_document_element, length - 1);
3558 }
3559 else
3560 {
3561 // roll back offset if it occurs on a null terminator in the source buffer
3562 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0)
3563 result.offset--;
3564 }
3565
3566 return result;
3567 }
3568 };
3569
3570 // Output facilities
3571 PUGI__FN xml_encoding get_write_native_encoding()
3572 {
3573 #ifdef PUGIXML_WCHAR_MODE
3574 return get_wchar_encoding();
3575 #else
3576 return encoding_utf8;
3577 #endif
3578 }
3579
3580 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding)
3581 {
3582 // replace wchar encoding with utf implementation
3583 if (encoding == encoding_wchar) return get_wchar_encoding();
3584
3585 // replace utf16 encoding with utf16 with specific endianness
3586 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3587
3588 // replace utf32 encoding with utf32 with specific endianness
3589 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3590
3591 // only do autodetection if no explicit encoding is requested
3592 if (encoding != encoding_auto) return encoding;
3593
3594 // assume utf8 encoding
3595 return encoding_utf8;
3596 }
3597
3598 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T)
3599 {
3600 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3601
3602 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3603
3604 return static_cast<size_t>(end - dest) * sizeof(*dest);
3605 }
3606
3607 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap)
3608 {
3609 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type));
3610
3611 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T());
3612
3613 if (opt_swap)
3614 {
3615 for (typename T::value_type i = dest; i != end; ++i)
3616 *i = endian_swap(*i);
3617 }
3618
3619 return static_cast<size_t>(end - dest) * sizeof(*dest);
3620 }
3621
3622 #ifdef PUGIXML_WCHAR_MODE
3623 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3624 {
3625 if (length < 1) return 0;
3626
3627 // discard last character if it's the lead of a surrogate pair
3628 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length;
3629 }
3630
3631 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3632 {
3633 // only endian-swapping is required
3634 if (need_endian_swap_utf(encoding, get_wchar_encoding()))
3635 {
3636 convert_wchar_endian_swap(r_char, data, length);
3637
3638 return length * sizeof(char_t);
3639 }
3640
3641 // convert to utf8
3642 if (encoding == encoding_utf8)
3643 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer());
3644
3645 // convert to utf16
3646 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3647 {
3648 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3649
3650 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding);
3651 }
3652
3653 // convert to utf32
3654 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3655 {
3656 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3657
3658 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding);
3659 }
3660
3661 // convert to latin1
3662 if (encoding == encoding_latin1)
3663 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer());
3664
3665 assert(false && "Invalid encoding"); // unreachable
3666 return 0;
3667 }
3668 #else
3669 PUGI__FN size_t get_valid_length(const char_t* data, size_t length)
3670 {
3671 if (length < 5) return 0;
3672
3673 for (size_t i = 1; i <= 4; ++i)
3674 {
3675 uint8_t ch = static_cast<uint8_t>(data[length - i]);
3676
3677 // either a standalone character or a leading one
3678 if ((ch & 0xc0) != 0x80) return length - i;
3679 }
3680
3681 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk
3682 return length;
3683 }
3684
3685 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding)
3686 {
3687 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le)
3688 {
3689 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be;
3690
3691 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding);
3692 }
3693
3694 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le)
3695 {
3696 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be;
3697
3698 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding);
3699 }
3700
3701 if (encoding == encoding_latin1)
3702 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer());
3703
3704 assert(false && "Invalid encoding"); // unreachable
3705 return 0;
3706 }
3707 #endif
3708
3709 class xml_buffered_writer
3710 {
3711 xml_buffered_writer(const xml_buffered_writer&);
3712 xml_buffered_writer& operator=(const xml_buffered_writer&);
3713
3714 public:
3715 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding))
3716 {
3717 PUGI__STATIC_ASSERT(bufcapacity >= 8);
3718 }
3719
3720 size_t flush()
3721 {
3722 flush(buffer, bufsize);
3723 bufsize = 0;
3724 return 0;
3725 }
3726
3727 void flush(const char_t* data, size_t size)
3728 {
3729 if (size == 0) return;
3730
3731 // fast path, just write data
3732 if (encoding == get_write_native_encoding())
3733 writer.write(data, size * sizeof(char_t));
3734 else
3735 {
3736 // convert chunk
3737 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding);
3738 assert(result <= sizeof(scratch));
3739
3740 // write data
3741 writer.write(scratch.data_u8, result);
3742 }
3743 }
3744
3745 void write_direct(const char_t* data, size_t length)
3746 {
3747 // flush the remaining buffer contents
3748 flush();
3749
3750 // handle large chunks
3751 if (length > bufcapacity)
3752 {
3753 if (encoding == get_write_native_encoding())
3754 {
3755 // fast path, can just write data chunk
3756 writer.write(data, length * sizeof(char_t));
3757 return;
3758 }
3759
3760 // need to convert in suitable chunks
3761 while (length > bufcapacity)
3762 {
3763 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer
3764 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary)
3765 size_t chunk_size = get_valid_length(data, bufcapacity);
3766 assert(chunk_size);
3767
3768 // convert chunk and write
3769 flush(data, chunk_size);
3770
3771 // iterate
3772 data += chunk_size;
3773 length -= chunk_size;
3774 }
3775
3776 // small tail is copied below
3777 bufsize = 0;
3778 }
3779
3780 memcpy(buffer + bufsize, data, length * sizeof(char_t));
3781 bufsize += length;
3782 }
3783
3784 void write_buffer(const char_t* data, size_t length)
3785 {
3786 size_t offset = bufsize;
3787
3788 if (offset + length <= bufcapacity)
3789 {
3790 memcpy(buffer + offset, data, length * sizeof(char_t));
3791 bufsize = offset + length;
3792 }
3793 else
3794 {
3795 write_direct(data, length);
3796 }
3797 }
3798
3799 void write_string(const char_t* data)
3800 {
3801 // write the part of the string that fits in the buffer
3802 size_t offset = bufsize;
3803
3804 while (*data && offset < bufcapacity)
3805 buffer[offset++] = *data++;
3806
3807 // write the rest
3808 if (offset < bufcapacity)
3809 {
3810 bufsize = offset;
3811 }
3812 else
3813 {
3814 // backtrack a bit if we have split the codepoint
3815 size_t length = offset - bufsize;
3816 size_t extra = length - get_valid_length(data - length, length);
3817
3818 bufsize = offset - extra;
3819
3820 write_direct(data - extra, strlength(data) + extra);
3821 }
3822 }
3823
3824 void write(char_t d0)
3825 {
3826 size_t offset = bufsize;
3827 if (offset > bufcapacity - 1) offset = flush();
3828
3829 buffer[offset + 0] = d0;
3830 bufsize = offset + 1;
3831 }
3832
3833 void write(char_t d0, char_t d1)
3834 {
3835 size_t offset = bufsize;
3836 if (offset > bufcapacity - 2) offset = flush();
3837
3838 buffer[offset + 0] = d0;
3839 buffer[offset + 1] = d1;
3840 bufsize = offset + 2;
3841 }
3842
3843 void write(char_t d0, char_t d1, char_t d2)
3844 {
3845 size_t offset = bufsize;
3846 if (offset > bufcapacity - 3) offset = flush();
3847
3848 buffer[offset + 0] = d0;
3849 buffer[offset + 1] = d1;
3850 buffer[offset + 2] = d2;
3851 bufsize = offset + 3;
3852 }
3853
3854 void write(char_t d0, char_t d1, char_t d2, char_t d3)
3855 {
3856 size_t offset = bufsize;
3857 if (offset > bufcapacity - 4) offset = flush();
3858
3859 buffer[offset + 0] = d0;
3860 buffer[offset + 1] = d1;
3861 buffer[offset + 2] = d2;
3862 buffer[offset + 3] = d3;
3863 bufsize = offset + 4;
3864 }
3865
3866 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4)
3867 {
3868 size_t offset = bufsize;
3869 if (offset > bufcapacity - 5) offset = flush();
3870
3871 buffer[offset + 0] = d0;
3872 buffer[offset + 1] = d1;
3873 buffer[offset + 2] = d2;
3874 buffer[offset + 3] = d3;
3875 buffer[offset + 4] = d4;
3876 bufsize = offset + 5;
3877 }
3878
3879 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5)
3880 {
3881 size_t offset = bufsize;
3882 if (offset > bufcapacity - 6) offset = flush();
3883
3884 buffer[offset + 0] = d0;
3885 buffer[offset + 1] = d1;
3886 buffer[offset + 2] = d2;
3887 buffer[offset + 3] = d3;
3888 buffer[offset + 4] = d4;
3889 buffer[offset + 5] = d5;
3890 bufsize = offset + 6;
3891 }
3892
3893 // utf8 maximum expansion: x4 (-> utf32)
3894 // utf16 maximum expansion: x2 (-> utf32)
3895 // utf32 maximum expansion: x1
3896 enum
3897 {
3898 bufcapacitybytes =
3899 #ifdef PUGIXML_MEMORY_OUTPUT_STACK
3900 PUGIXML_MEMORY_OUTPUT_STACK
3901 #else
3902 10240
3903 #endif
3904 ,
3905 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4)
3906 };
3907
3908 char_t buffer[bufcapacity];
3909
3910 union
3911 {
3912 uint8_t data_u8[4 * bufcapacity];
3913 uint16_t data_u16[2 * bufcapacity];
3914 uint32_t data_u32[bufcapacity];
3915 char_t data_char[bufcapacity];
3916 } scratch;
3917
3918 xml_writer& writer;
3919 size_t bufsize;
3920 xml_encoding encoding;
3921 };
3922
3923 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3924 {
3925 while (*s)
3926 {
3927 const char_t* prev = s;
3928
3929 // While *s is a usual symbol
3930 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type));
3931
3932 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3933
3934 switch (*s)
3935 {
3936 case 0: break;
3937 case '&':
3938 writer.write('&', 'a', 'm', 'p', ';');
3939 ++s;
3940 break;
3941 case '<':
3942 writer.write('&', 'l', 't', ';');
3943 ++s;
3944 break;
3945 case '>':
3946 writer.write('&', 'g', 't', ';');
3947 ++s;
3948 break;
3949 case '"':
3950 if (flags & format_attribute_single_quote)
3951 writer.write('"');
3952 else
3953 writer.write('&', 'q', 'u', 'o', 't', ';');
3954 ++s;
3955 break;
3956 case '\'':
3957 if (flags & format_attribute_single_quote)
3958 writer.write('&', 'a', 'p', 'o', 's', ';');
3959 else
3960 writer.write('\'');
3961 ++s;
3962 break;
3963 default: // s is not a usual symbol
3964 {
3965 unsigned int ch = static_cast<unsigned int>(*s++);
3966 assert(ch < 32);
3967
3968 if (!(flags & format_skip_control_chars))
3969 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';');
3970 }
3971 }
3972 }
3973 }
3974
3975 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags)
3976 {
3977 if (flags & format_no_escapes)
3978 writer.write_string(s);
3979 else
3980 text_output_escaped(writer, s, type, flags);
3981 }
3982
3983 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s)
3984 {
3985 do
3986 {
3987 writer.write('<', '!', '[', 'C', 'D');
3988 writer.write('A', 'T', 'A', '[');
3989
3990 const char_t* prev = s;
3991
3992 // look for ]]> sequence - we can't output it as is since it terminates CDATA
3993 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s;
3994
3995 // skip ]] if we stopped at ]]>, > will go to the next CDATA section
3996 if (*s) s += 2;
3997
3998 writer.write_buffer(prev, static_cast<size_t>(s - prev));
3999
4000 writer.write(']', ']', '>');
4001 }
4002 while (*s);
4003 }
4004
4005 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth)
4006 {
4007 switch (indent_length)
4008 {
4009 case 1:
4010 {
4011 for (unsigned int i = 0; i < depth; ++i)
4012 writer.write(indent[0]);
4013 break;
4014 }
4015
4016 case 2:
4017 {
4018 for (unsigned int i = 0; i < depth; ++i)
4019 writer.write(indent[0], indent[1]);
4020 break;
4021 }
4022
4023 case 3:
4024 {
4025 for (unsigned int i = 0; i < depth; ++i)
4026 writer.write(indent[0], indent[1], indent[2]);
4027 break;
4028 }
4029
4030 case 4:
4031 {
4032 for (unsigned int i = 0; i < depth; ++i)
4033 writer.write(indent[0], indent[1], indent[2], indent[3]);
4034 break;
4035 }
4036
4037 default:
4038 {
4039 for (unsigned int i = 0; i < depth; ++i)
4040 writer.write_buffer(indent, indent_length);
4041 }
4042 }
4043 }
4044
4045 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s)
4046 {
4047 writer.write('<', '!', '-', '-');
4048
4049 while (*s)
4050 {
4051 const char_t* prev = s;
4052
4053 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body
4054 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s;
4055
4056 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4057
4058 if (*s)
4059 {
4060 assert(*s == '-');
4061
4062 writer.write('-', ' ');
4063 ++s;
4064 }
4065 }
4066
4067 writer.write('-', '-', '>');
4068 }
4069
4070 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s)
4071 {
4072 while (*s)
4073 {
4074 const char_t* prev = s;
4075
4076 // look for ?> sequence - we can't output it since ?> terminates PI
4077 while (*s && !(s[0] == '?' && s[1] == '>')) ++s;
4078
4079 writer.write_buffer(prev, static_cast<size_t>(s - prev));
4080
4081 if (*s)
4082 {
4083 assert(s[0] == '?' && s[1] == '>');
4084
4085 writer.write('?', ' ', '>');
4086 s += 2;
4087 }
4088 }
4089 }
4090
4091 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4092 {
4093 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4094 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"';
4095
4096 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4097 {
4098 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes)
4099 {
4100 writer.write('\n');
4101
4102 text_output_indent(writer, indent, indent_length, depth + 1);
4103 }
4104 else
4105 {
4106 writer.write(' ');
4107 }
4108
4109 writer.write_string(a->name ? a->name + 0 : default_name);
4110 writer.write('=', enquotation_char);
4111
4112 if (a->value)
4113 text_output(writer, a->value, ctx_special_attr, flags);
4114
4115 writer.write(enquotation_char);
4116 }
4117 }
4118
4119 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth)
4120 {
4121 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4122 const char_t* name = node->name ? node->name + 0 : default_name;
4123
4124 writer.write('<');
4125 writer.write_string(name);
4126
4127 if (node->first_attribute)
4128 node_output_attributes(writer, node, indent, indent_length, flags, depth);
4129
4130 // element nodes can have value if parse_embed_pcdata was used
4131 if (!node->value)
4132 {
4133 if (!node->first_child)
4134 {
4135 if (flags & format_no_empty_element_tags)
4136 {
4137 writer.write('>', '<', '/');
4138 writer.write_string(name);
4139 writer.write('>');
4140
4141 return false;
4142 }
4143 else
4144 {
4145 if ((flags & format_raw) == 0)
4146 writer.write(' ');
4147
4148 writer.write('/', '>');
4149
4150 return false;
4151 }
4152 }
4153 else
4154 {
4155 writer.write('>');
4156
4157 return true;
4158 }
4159 }
4160 else
4161 {
4162 writer.write('>');
4163
4164 text_output(writer, node->value, ctx_special_pcdata, flags);
4165
4166 if (!node->first_child)
4167 {
4168 writer.write('<', '/');
4169 writer.write_string(name);
4170 writer.write('>');
4171
4172 return false;
4173 }
4174 else
4175 {
4176 return true;
4177 }
4178 }
4179 }
4180
4181 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node)
4182 {
4183 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4184 const char_t* name = node->name ? node->name + 0 : default_name;
4185
4186 writer.write('<', '/');
4187 writer.write_string(name);
4188 writer.write('>');
4189 }
4190
4191 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags)
4192 {
4193 const char_t* default_name = PUGIXML_TEXT(":anonymous");
4194
4195 switch (PUGI__NODETYPE(node))
4196 {
4197 case node_pcdata:
4198 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags);
4199 break;
4200
4201 case node_cdata:
4202 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4203 break;
4204
4205 case node_comment:
4206 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""));
4207 break;
4208
4209 case node_pi:
4210 writer.write('<', '?');
4211 writer.write_string(node->name ? node->name + 0 : default_name);
4212
4213 if (node->value)
4214 {
4215 writer.write(' ');
4216 node_output_pi_value(writer, node->value);
4217 }
4218
4219 writer.write('?', '>');
4220 break;
4221
4222 case node_declaration:
4223 writer.write('<', '?');
4224 writer.write_string(node->name ? node->name + 0 : default_name);
4225 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0);
4226 writer.write('?', '>');
4227 break;
4228
4229 case node_doctype:
4230 writer.write('<', '!', 'D', 'O', 'C');
4231 writer.write('T', 'Y', 'P', 'E');
4232
4233 if (node->value)
4234 {
4235 writer.write(' ');
4236 writer.write_string(node->value);
4237 }
4238
4239 writer.write('>');
4240 break;
4241
4242 default:
4243 assert(false && "Invalid node type"); // unreachable
4244 }
4245 }
4246
4247 enum indent_flags_t
4248 {
4249 indent_newline = 1,
4250 indent_indent = 2
4251 };
4252
4253 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth)
4254 {
4255 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0;
4256 unsigned int indent_flags = indent_indent;
4257
4258 xml_node_struct* node = root;
4259
4260 do
4261 {
4262 assert(node);
4263
4264 // begin writing current node
4265 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata)
4266 {
4267 node_output_simple(writer, node, flags);
4268
4269 indent_flags = 0;
4270 }
4271 else
4272 {
4273 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4274 writer.write('\n');
4275
4276 if ((indent_flags & indent_indent) && indent_length)
4277 text_output_indent(writer, indent, indent_length, depth);
4278
4279 if (PUGI__NODETYPE(node) == node_element)
4280 {
4281 indent_flags = indent_newline | indent_indent;
4282
4283 if (node_output_start(writer, node, indent, indent_length, flags, depth))
4284 {
4285 // element nodes can have value if parse_embed_pcdata was used
4286 if (node->value)
4287 indent_flags = 0;
4288
4289 node = node->first_child;
4290 depth++;
4291 continue;
4292 }
4293 }
4294 else if (PUGI__NODETYPE(node) == node_document)
4295 {
4296 indent_flags = indent_indent;
4297
4298 if (node->first_child)
4299 {
4300 node = node->first_child;
4301 continue;
4302 }
4303 }
4304 else
4305 {
4306 node_output_simple(writer, node, flags);
4307
4308 indent_flags = indent_newline | indent_indent;
4309 }
4310 }
4311
4312 // continue to the next node
4313 while (node != root)
4314 {
4315 if (node->next_sibling)
4316 {
4317 node = node->next_sibling;
4318 break;
4319 }
4320
4321 node = node->parent;
4322
4323 // write closing node
4324 if (PUGI__NODETYPE(node) == node_element)
4325 {
4326 depth--;
4327
4328 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4329 writer.write('\n');
4330
4331 if ((indent_flags & indent_indent) && indent_length)
4332 text_output_indent(writer, indent, indent_length, depth);
4333
4334 node_output_end(writer, node);
4335
4336 indent_flags = indent_newline | indent_indent;
4337 }
4338 }
4339 }
4340 while (node != root);
4341
4342 if ((indent_flags & indent_newline) && (flags & format_raw) == 0)
4343 writer.write('\n');
4344 }
4345
4346 PUGI__FN bool has_declaration(xml_node_struct* node)
4347 {
4348 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling)
4349 {
4350 xml_node_type type = PUGI__NODETYPE(child);
4351
4352 if (type == node_declaration) return true;
4353 if (type == node_element) return false;
4354 }
4355
4356 return false;
4357 }
4358
4359 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node)
4360 {
4361 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute)
4362 if (a == attr)
4363 return true;
4364
4365 return false;
4366 }
4367
4368 PUGI__FN bool allow_insert_attribute(xml_node_type parent)
4369 {
4370 return parent == node_element || parent == node_declaration;
4371 }
4372
4373 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child)
4374 {
4375 if (parent != node_document && parent != node_element) return false;
4376 if (child == node_document || child == node_null) return false;
4377 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false;
4378
4379 return true;
4380 }
4381
4382 PUGI__FN bool allow_move(xml_node parent, xml_node child)
4383 {
4384 // check that child can be a child of parent
4385 if (!allow_insert_child(parent.type(), child.type()))
4386 return false;
4387
4388 // check that node is not moved between documents
4389 if (parent.root() != child.root())
4390 return false;
4391
4392 // check that new parent is not in the child subtree
4393 xml_node cur = parent;
4394
4395 while (cur)
4396 {
4397 if (cur == child)
4398 return false;
4399
4400 cur = cur.parent();
4401 }
4402
4403 return true;
4404 }
4405
4406 template <typename String, typename Header>
4407 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc)
4408 {
4409 assert(!dest && (header & header_mask) == 0);
4410
4411 if (source)
4412 {
4413 if (alloc && (source_header & header_mask) == 0)
4414 {
4415 dest = source;
4416
4417 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared
4418 header |= xml_memory_page_contents_shared_mask;
4419 source_header |= xml_memory_page_contents_shared_mask;
4420 }
4421 else
4422 strcpy_insitu(dest, header, header_mask, source, strlength(source));
4423 }
4424 }
4425
4426 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc)
4427 {
4428 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc);
4429 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc);
4430
4431 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute)
4432 {
4433 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn));
4434
4435 if (da)
4436 {
4437 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4438 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4439 }
4440 }
4441 }
4442
4443 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn)
4444 {
4445 xml_allocator& alloc = get_allocator(dn);
4446 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0;
4447
4448 node_copy_contents(dn, sn, shared_alloc);
4449
4450 xml_node_struct* dit = dn;
4451 xml_node_struct* sit = sn->first_child;
4452
4453 while (sit && sit != sn)
4454 {
4455 // loop invariant: dit is inside the subtree rooted at dn
4456 assert(dit);
4457
4458 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop
4459 if (sit != dn)
4460 {
4461 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit));
4462
4463 if (copy)
4464 {
4465 node_copy_contents(copy, sit, shared_alloc);
4466
4467 if (sit->first_child)
4468 {
4469 dit = copy;
4470 sit = sit->first_child;
4471 continue;
4472 }
4473 }
4474 }
4475
4476 // continue to the next node
4477 do
4478 {
4479 if (sit->next_sibling)
4480 {
4481 sit = sit->next_sibling;
4482 break;
4483 }
4484
4485 sit = sit->parent;
4486 dit = dit->parent;
4487
4488 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn
4489 assert(sit == sn || dit);
4490 }
4491 while (sit != sn);
4492 }
4493
4494 assert(!sit || dit == dn->parent);
4495 }
4496
4497 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa)
4498 {
4499 xml_allocator& alloc = get_allocator(da);
4500 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0;
4501
4502 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc);
4503 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc);
4504 }
4505
4506 inline bool is_text_node(xml_node_struct* node)
4507 {
4508 xml_node_type type = PUGI__NODETYPE(node);
4509
4510 return type == node_pcdata || type == node_cdata;
4511 }
4512
4513 // get value with conversion functions
4514 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv)
4515 {
4516 U result = 0;
4517 const char_t* s = value;
4518
4519 while (PUGI__IS_CHARTYPE(*s, ct_space))
4520 s++;
4521
4522 bool negative = (*s == '-');
4523
4524 s += (*s == '+' || *s == '-');
4525
4526 bool overflow = false;
4527
4528 if (s[0] == '0' && (s[1] | ' ') == 'x')
4529 {
4530 s += 2;
4531
4532 // since overflow detection relies on length of the sequence skip leading zeros
4533 while (*s == '0')
4534 s++;
4535
4536 const char_t* start = s;
4537
4538 for (;;)
4539 {
4540 if (static_cast<unsigned>(*s - '0') < 10)
4541 result = result * 16 + (*s - '0');
4542 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6)
4543 result = result * 16 + ((*s | ' ') - 'a' + 10);
4544 else
4545 break;
4546
4547 s++;
4548 }
4549
4550 size_t digits = static_cast<size_t>(s - start);
4551
4552 overflow = digits > sizeof(U) * 2;
4553 }
4554 else
4555 {
4556 // since overflow detection relies on length of the sequence skip leading zeros
4557 while (*s == '0')
4558 s++;
4559
4560 const char_t* start = s;
4561
4562 for (;;)
4563 {
4564 if (static_cast<unsigned>(*s - '0') < 10)
4565 result = result * 10 + (*s - '0');
4566 else
4567 break;
4568
4569 s++;
4570 }
4571
4572 size_t digits = static_cast<size_t>(s - start);
4573
4574 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2);
4575
4576 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5;
4577 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6';
4578 const size_t high_bit = sizeof(U) * 8 - 1;
4579
4580 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit)));
4581 }
4582
4583 if (negative)
4584 {
4585 // Workaround for crayc++ CC-3059: Expected no overflow in routine.
4586 #ifdef _CRAYC
4587 return (overflow || result > ~minv + 1) ? minv : ~result + 1;
4588 #else
4589 return (overflow || result > 0 - minv) ? minv : 0 - result;
4590 #endif
4591 }
4592 else
4593 return (overflow || result > maxv) ? maxv : result;
4594 }
4595
4596 PUGI__FN int get_value_int(const char_t* value)
4597 {
4598 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX);
4599 }
4600
4601 PUGI__FN unsigned int get_value_uint(const char_t* value)
4602 {
4603 return string_to_integer<unsigned int>(value, 0, UINT_MAX);
4604 }
4605
4606 PUGI__FN double get_value_double(const char_t* value)
4607 {
4608 #ifdef PUGIXML_WCHAR_MODE
4609 return wcstod(value, 0);
4610 #else
4611 return strtod(value, 0);
4612 #endif
4613 }
4614
4615 PUGI__FN float get_value_float(const char_t* value)
4616 {
4617 #ifdef PUGIXML_WCHAR_MODE
4618 return static_cast<float>(wcstod(value, 0));
4619 #else
4620 return static_cast<float>(strtod(value, 0));
4621 #endif
4622 }
4623
4624 PUGI__FN bool get_value_bool(const char_t* value)
4625 {
4626 // only look at first char
4627 char_t first = *value;
4628
4629 // 1*, t* (true), T* (True), y* (yes), Y* (YES)
4630 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y');
4631 }
4632
4633 #ifdef PUGIXML_HAS_LONG_LONG
4634 PUGI__FN long long get_value_llong(const char_t* value)
4635 {
4636 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX);
4637 }
4638
4639 PUGI__FN unsigned long long get_value_ullong(const char_t* value)
4640 {
4641 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX);
4642 }
4643 #endif
4644
4645 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative)
4646 {
4647 char_t* result = end - 1;
4648 U rest = negative ? 0 - value : value;
4649
4650 do
4651 {
4652 *result-- = static_cast<char_t>('0' + (rest % 10));
4653 rest /= 10;
4654 }
4655 while (rest);
4656
4657 assert(result >= begin);
4658 (void)begin;
4659
4660 *result = '-';
4661
4662 return result + !negative;
4663 }
4664
4665 // set value with conversion functions
4666 template <typename String, typename Header>
4667 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf)
4668 {
4669 #ifdef PUGIXML_WCHAR_MODE
4670 char_t wbuf[128];
4671 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0]));
4672
4673 size_t offset = 0;
4674 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset];
4675
4676 return strcpy_insitu(dest, header, header_mask, wbuf, offset);
4677 #else
4678 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf));
4679 #endif
4680 }
4681
4682 template <typename U, typename String, typename Header>
4683 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative)
4684 {
4685 char_t buf[64];
4686 char_t* end = buf + sizeof(buf) / sizeof(buf[0]);
4687 char_t* begin = integer_to_string(buf, end, value, negative);
4688
4689 return strcpy_insitu(dest, header, header_mask, begin, end - begin);
4690 }
4691
4692 template <typename String, typename Header>
4693 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision)
4694 {
4695 char buf[128];
4696 PUGI__SNPRINTF(buf, "%.*g", precision, double(value));
4697
4698 return set_value_ascii(dest, header, header_mask, buf);
4699 }
4700
4701 template <typename String, typename Header>
4702 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision)
4703 {
4704 char buf[128];
4705 PUGI__SNPRINTF(buf, "%.*g", precision, value);
4706
4707 return set_value_ascii(dest, header, header_mask, buf);
4708 }
4709
4710 template <typename String, typename Header>
4711 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value)
4712 {
4713 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5);
4714 }
4715
4716 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer)
4717 {
4718 // check input buffer
4719 if (!contents && size) return make_parse_result(status_io_error);
4720
4721 // get actual encoding
4722 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size);
4723
4724 // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it
4725 auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate);
4726
4727 // get private buffer
4728 char_t* buffer = 0;
4729 size_t length = 0;
4730
4731 // coverity[var_deref_model]
4732 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory);
4733
4734 // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it
4735 contents_guard.release();
4736
4737 // delete original buffer if we performed a conversion
4738 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents);
4739
4740 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself
4741 if (own || buffer != contents) *out_buffer = buffer;
4742
4743 // store buffer for offset_debug
4744 doc->buffer = buffer;
4745
4746 // parse
4747 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options);
4748
4749 // remember encoding
4750 res.encoding = buffer_encoding;
4751
4752 return res;
4753 }
4754
4755 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick
4756 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result)
4757 {
4758 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
4759 // there are 64-bit versions of fseek/ftell, let's use them
4760 typedef __int64 length_type;
4761
4762 _fseeki64(file, 0, SEEK_END);
4763 length_type length = _ftelli64(file);
4764 _fseeki64(file, 0, SEEK_SET);
4765 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))
4766 // there are 64-bit versions of fseek/ftell, let's use them
4767 typedef off64_t length_type;
4768
4769 fseeko64(file, 0, SEEK_END);
4770 length_type length = ftello64(file);
4771 fseeko64(file, 0, SEEK_SET);
4772 #else
4773 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway.
4774 typedef long length_type;
4775
4776 fseek(file, 0, SEEK_END);
4777 length_type length = ftell(file);
4778 fseek(file, 0, SEEK_SET);
4779 #endif
4780
4781 // check for I/O errors
4782 if (length < 0) return status_io_error;
4783
4784 // check for overflow
4785 size_t result = static_cast<size_t>(length);
4786
4787 if (static_cast<length_type>(result) != length) return status_out_of_memory;
4788
4789 // finalize
4790 out_result = result;
4791
4792 return status_ok;
4793 }
4794
4795 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size
4796 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding)
4797 {
4798 // We only need to zero-terminate if encoding conversion does not do it for us
4799 #ifdef PUGIXML_WCHAR_MODE
4800 xml_encoding wchar_encoding = get_wchar_encoding();
4801
4802 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding))
4803 {
4804 size_t length = size / sizeof(char_t);
4805
4806 static_cast<char_t*>(buffer)[length] = 0;
4807 return (length + 1) * sizeof(char_t);
4808 }
4809 #else
4810 if (encoding == encoding_utf8)
4811 {
4812 static_cast<char*>(buffer)[size] = 0;
4813 return size + 1;
4814 }
4815 #endif
4816
4817 return size;
4818 }
4819
4820 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4821 {
4822 if (!file) return make_parse_result(status_file_not_found);
4823
4824 // get file size (can result in I/O errors)
4825 size_t size = 0;
4826 xml_parse_status size_status = get_file_size(file, size);
4827 if (size_status != status_ok) return make_parse_result(size_status);
4828
4829 size_t max_suffix_size = sizeof(char_t);
4830
4831 // allocate buffer for the whole file
4832 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size));
4833 if (!contents) return make_parse_result(status_out_of_memory);
4834
4835 // read file in memory
4836 size_t read_size = fread(contents, 1, size, file);
4837
4838 if (read_size != size)
4839 {
4840 xml_memory::deallocate(contents);
4841 return make_parse_result(status_io_error);
4842 }
4843
4844 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size);
4845
4846 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer);
4847 }
4848
4849 PUGI__FN void close_file(FILE* file)
4850 {
4851 fclose(file);
4852 }
4853
4854 #ifndef PUGIXML_NO_STL
4855 template <typename T> struct xml_stream_chunk
4856 {
4857 static xml_stream_chunk* create()
4858 {
4859 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk));
4860 if (!memory) return 0;
4861
4862 return new (memory) xml_stream_chunk();
4863 }
4864
4865 static void destroy(xml_stream_chunk* chunk)
4866 {
4867 // free chunk chain
4868 while (chunk)
4869 {
4870 xml_stream_chunk* next_ = chunk->next;
4871
4872 xml_memory::deallocate(chunk);
4873
4874 chunk = next_;
4875 }
4876 }
4877
4878 xml_stream_chunk(): next(0), size(0)
4879 {
4880 }
4881
4882 xml_stream_chunk* next;
4883 size_t size;
4884
4885 T data[xml_memory_page_size / sizeof(T)];
4886 };
4887
4888 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4889 {
4890 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy);
4891
4892 // read file to a chunk list
4893 size_t total = 0;
4894 xml_stream_chunk<T>* last = 0;
4895
4896 while (!stream.eof())
4897 {
4898 // allocate new chunk
4899 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create();
4900 if (!chunk) return status_out_of_memory;
4901
4902 // append chunk to list
4903 if (last) last = last->next = chunk;
4904 else chunks.data = last = chunk;
4905
4906 // read data to chunk
4907 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T)));
4908 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T);
4909
4910 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors
4911 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4912
4913 // guard against huge files (chunk size is small enough to make this overflow check work)
4914 if (total + chunk->size < total) return status_out_of_memory;
4915 total += chunk->size;
4916 }
4917
4918 size_t max_suffix_size = sizeof(char_t);
4919
4920 // copy chunk list to a contiguous buffer
4921 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size));
4922 if (!buffer) return status_out_of_memory;
4923
4924 char* write = buffer;
4925
4926 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next)
4927 {
4928 assert(write + chunk->size <= buffer + total);
4929 memcpy(write, chunk->data, chunk->size);
4930 write += chunk->size;
4931 }
4932
4933 assert(write == buffer + total);
4934
4935 // return buffer
4936 *out_buffer = buffer;
4937 *out_size = total;
4938
4939 return status_ok;
4940 }
4941
4942 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size)
4943 {
4944 // get length of remaining data in stream
4945 typename std::basic_istream<T>::pos_type pos = stream.tellg();
4946 stream.seekg(0, std::ios::end);
4947 std::streamoff length = stream.tellg() - pos;
4948 stream.seekg(pos);
4949
4950 if (stream.fail() || pos < 0) return status_io_error;
4951
4952 // guard against huge files
4953 size_t read_length = static_cast<size_t>(length);
4954
4955 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory;
4956
4957 size_t max_suffix_size = sizeof(char_t);
4958
4959 // read stream data into memory (guard against stream exceptions with buffer holder)
4960 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate);
4961 if (!buffer.data) return status_out_of_memory;
4962
4963 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length));
4964
4965 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors
4966 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error;
4967
4968 // return buffer
4969 size_t actual_length = static_cast<size_t>(stream.gcount());
4970 assert(actual_length <= read_length);
4971
4972 *out_buffer = buffer.release();
4973 *out_size = actual_length * sizeof(T);
4974
4975 return status_ok;
4976 }
4977
4978 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer)
4979 {
4980 void* buffer = 0;
4981 size_t size = 0;
4982 xml_parse_status status = status_ok;
4983
4984 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits)
4985 if (stream.fail()) return make_parse_result(status_io_error);
4986
4987 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory)
4988 if (stream.tellg() < 0)
4989 {
4990 stream.clear(); // clear error flags that could be set by a failing tellg
4991 status = load_stream_data_noseek(stream, &buffer, &size);
4992 }
4993 else
4994 status = load_stream_data_seek(stream, &buffer, &size);
4995
4996 if (status != status_ok) return make_parse_result(status);
4997
4998 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size);
4999
5000 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer);
5001 }
5002 #endif
5003
5004 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)))
5005 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5006 {
5007 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5008 FILE* file = 0;
5009 return _wfopen_s(&file, path, mode) == 0 ? file : 0;
5010 #else
5011 return _wfopen(path, mode);
5012 #endif
5013 }
5014 #else
5015 PUGI__FN char* convert_path_heap(const wchar_t* str)
5016 {
5017 assert(str);
5018
5019 // first pass: get length in utf8 characters
5020 size_t length = strlength_wide(str);
5021 size_t size = as_utf8_begin(str, length);
5022
5023 // allocate resulting string
5024 char* result = static_cast<char*>(xml_memory::allocate(size + 1));
5025 if (!result) return 0;
5026
5027 // second pass: convert to utf8
5028 as_utf8_end(result, size, str, length);
5029
5030 // zero-terminate
5031 result[size] = 0;
5032
5033 return result;
5034 }
5035
5036 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode)
5037 {
5038 // there is no standard function to open wide paths, so our best bet is to try utf8 path
5039 char* path_utf8 = convert_path_heap(path);
5040 if (!path_utf8) return 0;
5041
5042 // convert mode to ASCII (we mirror _wfopen interface)
5043 char mode_ascii[4] = {0};
5044 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]);
5045
5046 // try to open the utf8 path
5047 FILE* result = fopen(path_utf8, mode_ascii);
5048
5049 // free dummy buffer
5050 xml_memory::deallocate(path_utf8);
5051
5052 return result;
5053 }
5054 #endif
5055
5056 PUGI__FN FILE* open_file(const char* path, const char* mode)
5057 {
5058 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
5059 FILE* file = 0;
5060 return fopen_s(&file, path, mode) == 0 ? file : 0;
5061 #else
5062 return fopen(path, mode);
5063 #endif
5064 }
5065
5066 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding)
5067 {
5068 if (!file) return false;
5069
5070 xml_writer_file writer(file);
5071 doc.save(writer, indent, flags, encoding);
5072
5073 return fflush(file) == 0 && ferror(file) == 0;
5074 }
5075
5076 struct name_null_sentry
5077 {
5078 xml_node_struct* node;
5079 char_t* name;
5080
5081 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name)
5082 {
5083 node->name = 0;
5084 }
5085
5086 ~name_null_sentry()
5087 {
5088 node->name = name;
5089 }
5090 };
5091 PUGI__NS_END
5092
5093 namespace pugi
5094 {
5095 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_)
5096 {
5097 }
5098
5099 PUGI__FN void xml_writer_file::write(const void* data, size_t size)
5100 {
5101 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file));
5102 (void)!result; // unfortunately we can't do proper error handling here
5103 }
5104
5105 #ifndef PUGIXML_NO_STL
5106 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0)
5107 {
5108 }
5109
5110 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream)
5111 {
5112 }
5113
5114 PUGI__FN void xml_writer_stream::write(const void* data, size_t size)
5115 {
5116 if (narrow_stream)
5117 {
5118 assert(!wide_stream);
5119 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size));
5120 }
5121 else
5122 {
5123 assert(wide_stream);
5124 assert(size % sizeof(wchar_t) == 0);
5125
5126 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t)));
5127 }
5128 }
5129 #endif
5130
5131 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0)
5132 {
5133 }
5134
5135 PUGI__FN xml_tree_walker::~xml_tree_walker()
5136 {
5137 }
5138
5139 PUGI__FN int xml_tree_walker::depth() const
5140 {
5141 return _depth;
5142 }
5143
5144 PUGI__FN bool xml_tree_walker::begin(xml_node&)
5145 {
5146 return true;
5147 }
5148
5149 PUGI__FN bool xml_tree_walker::end(xml_node&)
5150 {
5151 return true;
5152 }
5153
5154 PUGI__FN xml_attribute::xml_attribute(): _attr(0)
5155 {
5156 }
5157
5158 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr)
5159 {
5160 }
5161
5162 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***)
5163 {
5164 }
5165
5166 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const
5167 {
5168 return _attr ? unspecified_bool_xml_attribute : 0;
5169 }
5170
5171 PUGI__FN bool xml_attribute::operator!() const
5172 {
5173 return !_attr;
5174 }
5175
5176 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const
5177 {
5178 return (_attr == r._attr);
5179 }
5180
5181 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const
5182 {
5183 return (_attr != r._attr);
5184 }
5185
5186 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const
5187 {
5188 return (_attr < r._attr);
5189 }
5190
5191 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const
5192 {
5193 return (_attr > r._attr);
5194 }
5195
5196 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const
5197 {
5198 return (_attr <= r._attr);
5199 }
5200
5201 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const
5202 {
5203 return (_attr >= r._attr);
5204 }
5205
5206 PUGI__FN xml_attribute xml_attribute::next_attribute() const
5207 {
5208 if (!_attr) return xml_attribute();
5209 return xml_attribute(_attr->next_attribute);
5210 }
5211
5212 PUGI__FN xml_attribute xml_attribute::previous_attribute() const
5213 {
5214 if (!_attr) return xml_attribute();
5215 xml_attribute_struct* prev = _attr->prev_attribute_c;
5216 return prev->next_attribute ? xml_attribute(prev) : xml_attribute();
5217 }
5218
5219 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const
5220 {
5221 if (!_attr) return def;
5222 const char_t* value = _attr->value;
5223 return value ? value : def;
5224 }
5225
5226 PUGI__FN int xml_attribute::as_int(int def) const
5227 {
5228 if (!_attr) return def;
5229 const char_t* value = _attr->value;
5230 return value ? impl::get_value_int(value) : def;
5231 }
5232
5233 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const
5234 {
5235 if (!_attr) return def;
5236 const char_t* value = _attr->value;
5237 return value ? impl::get_value_uint(value) : def;
5238 }
5239
5240 PUGI__FN double xml_attribute::as_double(double def) const
5241 {
5242 if (!_attr) return def;
5243 const char_t* value = _attr->value;
5244 return value ? impl::get_value_double(value) : def;
5245 }
5246
5247 PUGI__FN float xml_attribute::as_float(float def) const
5248 {
5249 if (!_attr) return def;
5250 const char_t* value = _attr->value;
5251 return value ? impl::get_value_float(value) : def;
5252 }
5253
5254 PUGI__FN bool xml_attribute::as_bool(bool def) const
5255 {
5256 if (!_attr) return def;
5257 const char_t* value = _attr->value;
5258 return value ? impl::get_value_bool(value) : def;
5259 }
5260
5261 #ifdef PUGIXML_HAS_LONG_LONG
5262 PUGI__FN long long xml_attribute::as_llong(long long def) const
5263 {
5264 if (!_attr) return def;
5265 const char_t* value = _attr->value;
5266 return value ? impl::get_value_llong(value) : def;
5267 }
5268
5269 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const
5270 {
5271 if (!_attr) return def;
5272 const char_t* value = _attr->value;
5273 return value ? impl::get_value_ullong(value) : def;
5274 }
5275 #endif
5276
5277 PUGI__FN bool xml_attribute::empty() const
5278 {
5279 return !_attr;
5280 }
5281
5282 PUGI__FN const char_t* xml_attribute::name() const
5283 {
5284 if (!_attr) return PUGIXML_TEXT("");
5285 const char_t* name = _attr->name;
5286 return name ? name : PUGIXML_TEXT("");
5287 }
5288
5289 PUGI__FN const char_t* xml_attribute::value() const
5290 {
5291 if (!_attr) return PUGIXML_TEXT("");
5292 const char_t* value = _attr->value;
5293 return value ? value : PUGIXML_TEXT("");
5294 }
5295
5296 PUGI__FN size_t xml_attribute::hash_value() const
5297 {
5298 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct));
5299 }
5300
5301 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const
5302 {
5303 return _attr;
5304 }
5305
5306 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs)
5307 {
5308 set_value(rhs);
5309 return *this;
5310 }
5311
5312 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs)
5313 {
5314 set_value(rhs);
5315 return *this;
5316 }
5317
5318 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs)
5319 {
5320 set_value(rhs);
5321 return *this;
5322 }
5323
5324 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs)
5325 {
5326 set_value(rhs);
5327 return *this;
5328 }
5329
5330 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs)
5331 {
5332 set_value(rhs);
5333 return *this;
5334 }
5335
5336 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs)
5337 {
5338 set_value(rhs);
5339 return *this;
5340 }
5341
5342 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs)
5343 {
5344 set_value(rhs);
5345 return *this;
5346 }
5347
5348 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs)
5349 {
5350 set_value(rhs);
5351 return *this;
5352 }
5353
5354 #ifdef PUGIXML_HAS_LONG_LONG
5355 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs)
5356 {
5357 set_value(rhs);
5358 return *this;
5359 }
5360
5361 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs)
5362 {
5363 set_value(rhs);
5364 return *this;
5365 }
5366 #endif
5367
5368 PUGI__FN bool xml_attribute::set_name(const char_t* rhs)
5369 {
5370 if (!_attr) return false;
5371
5372 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5373 }
5374
5375 PUGI__FN bool xml_attribute::set_value(const char_t* rhs, size_t sz)
5376 {
5377 if (!_attr) return false;
5378
5379 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, sz);
5380 }
5381
5382 PUGI__FN bool xml_attribute::set_value(const char_t* rhs)
5383 {
5384 if (!_attr) return false;
5385
5386 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5387 }
5388
5389 PUGI__FN bool xml_attribute::set_value(int rhs)
5390 {
5391 if (!_attr) return false;
5392
5393 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5394 }
5395
5396 PUGI__FN bool xml_attribute::set_value(unsigned int rhs)
5397 {
5398 if (!_attr) return false;
5399
5400 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5401 }
5402
5403 PUGI__FN bool xml_attribute::set_value(long rhs)
5404 {
5405 if (!_attr) return false;
5406
5407 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5408 }
5409
5410 PUGI__FN bool xml_attribute::set_value(unsigned long rhs)
5411 {
5412 if (!_attr) return false;
5413
5414 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5415 }
5416
5417 PUGI__FN bool xml_attribute::set_value(double rhs)
5418 {
5419 if (!_attr) return false;
5420
5421 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision);
5422 }
5423
5424 PUGI__FN bool xml_attribute::set_value(double rhs, int precision)
5425 {
5426 if (!_attr) return false;
5427
5428 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5429 }
5430
5431 PUGI__FN bool xml_attribute::set_value(float rhs)
5432 {
5433 if (!_attr) return false;
5434
5435 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision);
5436 }
5437
5438 PUGI__FN bool xml_attribute::set_value(float rhs, int precision)
5439 {
5440 if (!_attr) return false;
5441
5442 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision);
5443 }
5444
5445 PUGI__FN bool xml_attribute::set_value(bool rhs)
5446 {
5447 if (!_attr) return false;
5448
5449 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs);
5450 }
5451
5452 #ifdef PUGIXML_HAS_LONG_LONG
5453 PUGI__FN bool xml_attribute::set_value(long long rhs)
5454 {
5455 if (!_attr) return false;
5456
5457 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0);
5458 }
5459
5460 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs)
5461 {
5462 if (!_attr) return false;
5463
5464 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false);
5465 }
5466 #endif
5467
5468 #ifdef __BORLANDC__
5469 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs)
5470 {
5471 return (bool)lhs && rhs;
5472 }
5473
5474 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs)
5475 {
5476 return (bool)lhs || rhs;
5477 }
5478 #endif
5479
5480 PUGI__FN xml_node::xml_node(): _root(0)
5481 {
5482 }
5483
5484 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p)
5485 {
5486 }
5487
5488 PUGI__FN static void unspecified_bool_xml_node(xml_node***)
5489 {
5490 }
5491
5492 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const
5493 {
5494 return _root ? unspecified_bool_xml_node : 0;
5495 }
5496
5497 PUGI__FN bool xml_node::operator!() const
5498 {
5499 return !_root;
5500 }
5501
5502 PUGI__FN xml_node::iterator xml_node::begin() const
5503 {
5504 return iterator(_root ? _root->first_child + 0 : 0, _root);
5505 }
5506
5507 PUGI__FN xml_node::iterator xml_node::end() const
5508 {
5509 return iterator(0, _root);
5510 }
5511
5512 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const
5513 {
5514 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root);
5515 }
5516
5517 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const
5518 {
5519 return attribute_iterator(0, _root);
5520 }
5521
5522 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const
5523 {
5524 return xml_object_range<xml_node_iterator>(begin(), end());
5525 }
5526
5527 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const
5528 {
5529 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_));
5530 }
5531
5532 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const
5533 {
5534 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end());
5535 }
5536
5537 PUGI__FN bool xml_node::operator==(const xml_node& r) const
5538 {
5539 return (_root == r._root);
5540 }
5541
5542 PUGI__FN bool xml_node::operator!=(const xml_node& r) const
5543 {
5544 return (_root != r._root);
5545 }
5546
5547 PUGI__FN bool xml_node::operator<(const xml_node& r) const
5548 {
5549 return (_root < r._root);
5550 }
5551
5552 PUGI__FN bool xml_node::operator>(const xml_node& r) const
5553 {
5554 return (_root > r._root);
5555 }
5556
5557 PUGI__FN bool xml_node::operator<=(const xml_node& r) const
5558 {
5559 return (_root <= r._root);
5560 }
5561
5562 PUGI__FN bool xml_node::operator>=(const xml_node& r) const
5563 {
5564 return (_root >= r._root);
5565 }
5566
5567 PUGI__FN bool xml_node::empty() const
5568 {
5569 return !_root;
5570 }
5571
5572 PUGI__FN const char_t* xml_node::name() const
5573 {
5574 if (!_root) return PUGIXML_TEXT("");
5575 const char_t* name = _root->name;
5576 return name ? name : PUGIXML_TEXT("");
5577 }
5578
5579 PUGI__FN xml_node_type xml_node::type() const
5580 {
5581 return _root ? PUGI__NODETYPE(_root) : node_null;
5582 }
5583
5584 PUGI__FN const char_t* xml_node::value() const
5585 {
5586 if (!_root) return PUGIXML_TEXT("");
5587 const char_t* value = _root->value;
5588 return value ? value : PUGIXML_TEXT("");
5589 }
5590
5591 PUGI__FN xml_node xml_node::child(const char_t* name_) const
5592 {
5593 if (!_root) return xml_node();
5594
5595 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5596 {
5597 const char_t* iname = i->name;
5598 if (iname && impl::strequal(name_, iname))
5599 return xml_node(i);
5600 }
5601
5602 return xml_node();
5603 }
5604
5605 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const
5606 {
5607 if (!_root) return xml_attribute();
5608
5609 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute)
5610 {
5611 const char_t* iname = i->name;
5612 if (iname && impl::strequal(name_, iname))
5613 return xml_attribute(i);
5614 }
5615
5616 return xml_attribute();
5617 }
5618
5619 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const
5620 {
5621 if (!_root) return xml_node();
5622
5623 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling)
5624 {
5625 const char_t* iname = i->name;
5626 if (iname && impl::strequal(name_, iname))
5627 return xml_node(i);
5628 }
5629
5630 return xml_node();
5631 }
5632
5633 PUGI__FN xml_node xml_node::next_sibling() const
5634 {
5635 return _root ? xml_node(_root->next_sibling) : xml_node();
5636 }
5637
5638 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const
5639 {
5640 if (!_root) return xml_node();
5641
5642 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c)
5643 {
5644 const char_t* iname = i->name;
5645 if (iname && impl::strequal(name_, iname))
5646 return xml_node(i);
5647 }
5648
5649 return xml_node();
5650 }
5651
5652 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const
5653 {
5654 xml_attribute_struct* hint = hint_._attr;
5655
5656 // if hint is not an attribute of node, behavior is not defined
5657 assert(!hint || (_root && impl::is_attribute_of(hint, _root)));
5658
5659 if (!_root) return xml_attribute();
5660
5661 // optimistically search from hint up until the end
5662 for (xml_attribute_struct* i = hint; i; i = i->next_attribute)
5663 {
5664 const char_t* iname = i->name;
5665 if (iname && impl::strequal(name_, iname))
5666 {
5667 // update hint to maximize efficiency of searching for consecutive attributes
5668 hint_._attr = i->next_attribute;
5669
5670 return xml_attribute(i);
5671 }
5672 }
5673
5674 // wrap around and search from the first attribute until the hint
5675 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails
5676 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute)
5677 {
5678 const char_t* jname = j->name;
5679 if (jname && impl::strequal(name_, jname))
5680 {
5681 // update hint to maximize efficiency of searching for consecutive attributes
5682 hint_._attr = j->next_attribute;
5683
5684 return xml_attribute(j);
5685 }
5686 }
5687
5688 return xml_attribute();
5689 }
5690
5691 PUGI__FN xml_node xml_node::previous_sibling() const
5692 {
5693 if (!_root) return xml_node();
5694 xml_node_struct* prev = _root->prev_sibling_c;
5695 return prev->next_sibling ? xml_node(prev) : xml_node();
5696 }
5697
5698 PUGI__FN xml_node xml_node::parent() const
5699 {
5700 return _root ? xml_node(_root->parent) : xml_node();
5701 }
5702
5703 PUGI__FN xml_node xml_node::root() const
5704 {
5705 return _root ? xml_node(&impl::get_document(_root)) : xml_node();
5706 }
5707
5708 PUGI__FN xml_text xml_node::text() const
5709 {
5710 return xml_text(_root);
5711 }
5712
5713 PUGI__FN const char_t* xml_node::child_value() const
5714 {
5715 if (!_root) return PUGIXML_TEXT("");
5716
5717 // element nodes can have value if parse_embed_pcdata was used
5718 if (PUGI__NODETYPE(_root) == node_element && _root->value)
5719 return _root->value;
5720
5721 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
5722 {
5723 const char_t* ivalue = i->value;
5724 if (impl::is_text_node(i) && ivalue)
5725 return ivalue;
5726 }
5727
5728 return PUGIXML_TEXT("");
5729 }
5730
5731 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const
5732 {
5733 return child(name_).child_value();
5734 }
5735
5736 PUGI__FN xml_attribute xml_node::first_attribute() const
5737 {
5738 if (!_root) return xml_attribute();
5739 return xml_attribute(_root->first_attribute);
5740 }
5741
5742 PUGI__FN xml_attribute xml_node::last_attribute() const
5743 {
5744 if (!_root) return xml_attribute();
5745 xml_attribute_struct* first = _root->first_attribute;
5746 return first ? xml_attribute(first->prev_attribute_c) : xml_attribute();
5747 }
5748
5749 PUGI__FN xml_node xml_node::first_child() const
5750 {
5751 if (!_root) return xml_node();
5752 return xml_node(_root->first_child);
5753 }
5754
5755 PUGI__FN xml_node xml_node::last_child() const
5756 {
5757 if (!_root) return xml_node();
5758 xml_node_struct* first = _root->first_child;
5759 return first ? xml_node(first->prev_sibling_c) : xml_node();
5760 }
5761
5762 PUGI__FN bool xml_node::set_name(const char_t* rhs)
5763 {
5764 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5765
5766 if (type_ != node_element && type_ != node_pi && type_ != node_declaration)
5767 return false;
5768
5769 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs));
5770 }
5771
5772 PUGI__FN bool xml_node::set_value(const char_t* rhs, size_t sz)
5773 {
5774 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5775
5776 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5777 return false;
5778
5779 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, sz);
5780 }
5781
5782 PUGI__FN bool xml_node::set_value(const char_t* rhs)
5783 {
5784 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null;
5785
5786 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype)
5787 return false;
5788
5789 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs));
5790 }
5791
5792 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_)
5793 {
5794 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5795
5796 impl::xml_allocator& alloc = impl::get_allocator(_root);
5797 if (!alloc.reserve()) return xml_attribute();
5798
5799 xml_attribute a(impl::allocate_attribute(alloc));
5800 if (!a) return xml_attribute();
5801
5802 impl::append_attribute(a._attr, _root);
5803
5804 a.set_name(name_);
5805
5806 return a;
5807 }
5808
5809 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_)
5810 {
5811 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5812
5813 impl::xml_allocator& alloc = impl::get_allocator(_root);
5814 if (!alloc.reserve()) return xml_attribute();
5815
5816 xml_attribute a(impl::allocate_attribute(alloc));
5817 if (!a) return xml_attribute();
5818
5819 impl::prepend_attribute(a._attr, _root);
5820
5821 a.set_name(name_);
5822
5823 return a;
5824 }
5825
5826 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr)
5827 {
5828 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5829 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5830
5831 impl::xml_allocator& alloc = impl::get_allocator(_root);
5832 if (!alloc.reserve()) return xml_attribute();
5833
5834 xml_attribute a(impl::allocate_attribute(alloc));
5835 if (!a) return xml_attribute();
5836
5837 impl::insert_attribute_after(a._attr, attr._attr, _root);
5838
5839 a.set_name(name_);
5840
5841 return a;
5842 }
5843
5844 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr)
5845 {
5846 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5847 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5848
5849 impl::xml_allocator& alloc = impl::get_allocator(_root);
5850 if (!alloc.reserve()) return xml_attribute();
5851
5852 xml_attribute a(impl::allocate_attribute(alloc));
5853 if (!a) return xml_attribute();
5854
5855 impl::insert_attribute_before(a._attr, attr._attr, _root);
5856
5857 a.set_name(name_);
5858
5859 return a;
5860 }
5861
5862 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto)
5863 {
5864 if (!proto) return xml_attribute();
5865 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5866
5867 impl::xml_allocator& alloc = impl::get_allocator(_root);
5868 if (!alloc.reserve()) return xml_attribute();
5869
5870 xml_attribute a(impl::allocate_attribute(alloc));
5871 if (!a) return xml_attribute();
5872
5873 impl::append_attribute(a._attr, _root);
5874 impl::node_copy_attribute(a._attr, proto._attr);
5875
5876 return a;
5877 }
5878
5879 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto)
5880 {
5881 if (!proto) return xml_attribute();
5882 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5883
5884 impl::xml_allocator& alloc = impl::get_allocator(_root);
5885 if (!alloc.reserve()) return xml_attribute();
5886
5887 xml_attribute a(impl::allocate_attribute(alloc));
5888 if (!a) return xml_attribute();
5889
5890 impl::prepend_attribute(a._attr, _root);
5891 impl::node_copy_attribute(a._attr, proto._attr);
5892
5893 return a;
5894 }
5895
5896 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr)
5897 {
5898 if (!proto) return xml_attribute();
5899 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5900 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5901
5902 impl::xml_allocator& alloc = impl::get_allocator(_root);
5903 if (!alloc.reserve()) return xml_attribute();
5904
5905 xml_attribute a(impl::allocate_attribute(alloc));
5906 if (!a) return xml_attribute();
5907
5908 impl::insert_attribute_after(a._attr, attr._attr, _root);
5909 impl::node_copy_attribute(a._attr, proto._attr);
5910
5911 return a;
5912 }
5913
5914 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr)
5915 {
5916 if (!proto) return xml_attribute();
5917 if (!impl::allow_insert_attribute(type())) return xml_attribute();
5918 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute();
5919
5920 impl::xml_allocator& alloc = impl::get_allocator(_root);
5921 if (!alloc.reserve()) return xml_attribute();
5922
5923 xml_attribute a(impl::allocate_attribute(alloc));
5924 if (!a) return xml_attribute();
5925
5926 impl::insert_attribute_before(a._attr, attr._attr, _root);
5927 impl::node_copy_attribute(a._attr, proto._attr);
5928
5929 return a;
5930 }
5931
5932 PUGI__FN xml_node xml_node::append_child(xml_node_type type_)
5933 {
5934 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5935
5936 impl::xml_allocator& alloc = impl::get_allocator(_root);
5937 if (!alloc.reserve()) return xml_node();
5938
5939 xml_node n(impl::allocate_node(alloc, type_));
5940 if (!n) return xml_node();
5941
5942 impl::append_node(n._root, _root);
5943
5944 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5945
5946 return n;
5947 }
5948
5949 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_)
5950 {
5951 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5952
5953 impl::xml_allocator& alloc = impl::get_allocator(_root);
5954 if (!alloc.reserve()) return xml_node();
5955
5956 xml_node n(impl::allocate_node(alloc, type_));
5957 if (!n) return xml_node();
5958
5959 impl::prepend_node(n._root, _root);
5960
5961 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5962
5963 return n;
5964 }
5965
5966 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node)
5967 {
5968 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5969 if (!node._root || node._root->parent != _root) return xml_node();
5970
5971 impl::xml_allocator& alloc = impl::get_allocator(_root);
5972 if (!alloc.reserve()) return xml_node();
5973
5974 xml_node n(impl::allocate_node(alloc, type_));
5975 if (!n) return xml_node();
5976
5977 impl::insert_node_before(n._root, node._root);
5978
5979 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5980
5981 return n;
5982 }
5983
5984 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node)
5985 {
5986 if (!impl::allow_insert_child(type(), type_)) return xml_node();
5987 if (!node._root || node._root->parent != _root) return xml_node();
5988
5989 impl::xml_allocator& alloc = impl::get_allocator(_root);
5990 if (!alloc.reserve()) return xml_node();
5991
5992 xml_node n(impl::allocate_node(alloc, type_));
5993 if (!n) return xml_node();
5994
5995 impl::insert_node_after(n._root, node._root);
5996
5997 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml"));
5998
5999 return n;
6000 }
6001
6002 PUGI__FN xml_node xml_node::append_child(const char_t* name_)
6003 {
6004 xml_node result = append_child(node_element);
6005
6006 result.set_name(name_);
6007
6008 return result;
6009 }
6010
6011 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_)
6012 {
6013 xml_node result = prepend_child(node_element);
6014
6015 result.set_name(name_);
6016
6017 return result;
6018 }
6019
6020 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node)
6021 {
6022 xml_node result = insert_child_after(node_element, node);
6023
6024 result.set_name(name_);
6025
6026 return result;
6027 }
6028
6029 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node)
6030 {
6031 xml_node result = insert_child_before(node_element, node);
6032
6033 result.set_name(name_);
6034
6035 return result;
6036 }
6037
6038 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto)
6039 {
6040 xml_node_type type_ = proto.type();
6041 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6042
6043 impl::xml_allocator& alloc = impl::get_allocator(_root);
6044 if (!alloc.reserve()) return xml_node();
6045
6046 xml_node n(impl::allocate_node(alloc, type_));
6047 if (!n) return xml_node();
6048
6049 impl::append_node(n._root, _root);
6050 impl::node_copy_tree(n._root, proto._root);
6051
6052 return n;
6053 }
6054
6055 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto)
6056 {
6057 xml_node_type type_ = proto.type();
6058 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6059
6060 impl::xml_allocator& alloc = impl::get_allocator(_root);
6061 if (!alloc.reserve()) return xml_node();
6062
6063 xml_node n(impl::allocate_node(alloc, type_));
6064 if (!n) return xml_node();
6065
6066 impl::prepend_node(n._root, _root);
6067 impl::node_copy_tree(n._root, proto._root);
6068
6069 return n;
6070 }
6071
6072 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node)
6073 {
6074 xml_node_type type_ = proto.type();
6075 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6076 if (!node._root || node._root->parent != _root) return xml_node();
6077
6078 impl::xml_allocator& alloc = impl::get_allocator(_root);
6079 if (!alloc.reserve()) return xml_node();
6080
6081 xml_node n(impl::allocate_node(alloc, type_));
6082 if (!n) return xml_node();
6083
6084 impl::insert_node_after(n._root, node._root);
6085 impl::node_copy_tree(n._root, proto._root);
6086
6087 return n;
6088 }
6089
6090 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node)
6091 {
6092 xml_node_type type_ = proto.type();
6093 if (!impl::allow_insert_child(type(), type_)) return xml_node();
6094 if (!node._root || node._root->parent != _root) return xml_node();
6095
6096 impl::xml_allocator& alloc = impl::get_allocator(_root);
6097 if (!alloc.reserve()) return xml_node();
6098
6099 xml_node n(impl::allocate_node(alloc, type_));
6100 if (!n) return xml_node();
6101
6102 impl::insert_node_before(n._root, node._root);
6103 impl::node_copy_tree(n._root, proto._root);
6104
6105 return n;
6106 }
6107
6108 PUGI__FN xml_node xml_node::append_move(const xml_node& moved)
6109 {
6110 if (!impl::allow_move(*this, moved)) return xml_node();
6111
6112 impl::xml_allocator& alloc = impl::get_allocator(_root);
6113 if (!alloc.reserve()) return xml_node();
6114
6115 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6116 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6117
6118 impl::remove_node(moved._root);
6119 impl::append_node(moved._root, _root);
6120
6121 return moved;
6122 }
6123
6124 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved)
6125 {
6126 if (!impl::allow_move(*this, moved)) return xml_node();
6127
6128 impl::xml_allocator& alloc = impl::get_allocator(_root);
6129 if (!alloc.reserve()) return xml_node();
6130
6131 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6132 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6133
6134 impl::remove_node(moved._root);
6135 impl::prepend_node(moved._root, _root);
6136
6137 return moved;
6138 }
6139
6140 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node)
6141 {
6142 if (!impl::allow_move(*this, moved)) return xml_node();
6143 if (!node._root || node._root->parent != _root) return xml_node();
6144 if (moved._root == node._root) return xml_node();
6145
6146 impl::xml_allocator& alloc = impl::get_allocator(_root);
6147 if (!alloc.reserve()) return xml_node();
6148
6149 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6150 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6151
6152 impl::remove_node(moved._root);
6153 impl::insert_node_after(moved._root, node._root);
6154
6155 return moved;
6156 }
6157
6158 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node)
6159 {
6160 if (!impl::allow_move(*this, moved)) return xml_node();
6161 if (!node._root || node._root->parent != _root) return xml_node();
6162 if (moved._root == node._root) return xml_node();
6163
6164 impl::xml_allocator& alloc = impl::get_allocator(_root);
6165 if (!alloc.reserve()) return xml_node();
6166
6167 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers
6168 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask;
6169
6170 impl::remove_node(moved._root);
6171 impl::insert_node_before(moved._root, node._root);
6172
6173 return moved;
6174 }
6175
6176 PUGI__FN bool xml_node::remove_attribute(const char_t* name_)
6177 {
6178 return remove_attribute(attribute(name_));
6179 }
6180
6181 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a)
6182 {
6183 if (!_root || !a._attr) return false;
6184 if (!impl::is_attribute_of(a._attr, _root)) return false;
6185
6186 impl::xml_allocator& alloc = impl::get_allocator(_root);
6187 if (!alloc.reserve()) return false;
6188
6189 impl::remove_attribute(a._attr, _root);
6190 impl::destroy_attribute(a._attr, alloc);
6191
6192 return true;
6193 }
6194
6195 PUGI__FN bool xml_node::remove_attributes()
6196 {
6197 if (!_root) return false;
6198
6199 impl::xml_allocator& alloc = impl::get_allocator(_root);
6200 if (!alloc.reserve()) return false;
6201
6202 for (xml_attribute_struct* attr = _root->first_attribute; attr; )
6203 {
6204 xml_attribute_struct* next = attr->next_attribute;
6205
6206 impl::destroy_attribute(attr, alloc);
6207
6208 attr = next;
6209 }
6210
6211 _root->first_attribute = 0;
6212
6213 return true;
6214 }
6215
6216 PUGI__FN bool xml_node::remove_child(const char_t* name_)
6217 {
6218 return remove_child(child(name_));
6219 }
6220
6221 PUGI__FN bool xml_node::remove_child(const xml_node& n)
6222 {
6223 if (!_root || !n._root || n._root->parent != _root) return false;
6224
6225 impl::xml_allocator& alloc = impl::get_allocator(_root);
6226 if (!alloc.reserve()) return false;
6227
6228 impl::remove_node(n._root);
6229 impl::destroy_node(n._root, alloc);
6230
6231 return true;
6232 }
6233
6234 PUGI__FN bool xml_node::remove_children()
6235 {
6236 if (!_root) return false;
6237
6238 impl::xml_allocator& alloc = impl::get_allocator(_root);
6239 if (!alloc.reserve()) return false;
6240
6241 for (xml_node_struct* cur = _root->first_child; cur; )
6242 {
6243 xml_node_struct* next = cur->next_sibling;
6244
6245 impl::destroy_node(cur, alloc);
6246
6247 cur = next;
6248 }
6249
6250 _root->first_child = 0;
6251
6252 return true;
6253 }
6254
6255 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
6256 {
6257 // append_buffer is only valid for elements/documents
6258 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root);
6259
6260 // get document node
6261 impl::xml_document_struct* doc = &impl::get_document(_root);
6262
6263 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense
6264 doc->header |= impl::xml_memory_page_contents_shared_mask;
6265
6266 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later)
6267 impl::xml_memory_page* page = 0;
6268 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page));
6269 (void)page;
6270
6271 if (!extra) return impl::make_parse_result(status_out_of_memory);
6272
6273 #ifdef PUGIXML_COMPACT
6274 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned
6275 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account
6276 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1));
6277 #endif
6278
6279 // add extra buffer to the list
6280 extra->buffer = 0;
6281 extra->next = doc->extra_buffers;
6282 doc->extra_buffers = extra;
6283
6284 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level
6285 impl::name_null_sentry sentry(_root);
6286
6287 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer);
6288 }
6289
6290 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const
6291 {
6292 if (!_root) return xml_node();
6293
6294 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6295 {
6296 const char_t* iname = i->name;
6297 if (iname && impl::strequal(name_, iname))
6298 {
6299 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6300 {
6301 const char_t* aname = a->name;
6302 if (aname && impl::strequal(attr_name, aname))
6303 {
6304 const char_t* avalue = a->value;
6305 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
6306 return xml_node(i);
6307 }
6308 }
6309 }
6310 }
6311
6312 return xml_node();
6313 }
6314
6315 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const
6316 {
6317 if (!_root) return xml_node();
6318
6319 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
6320 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute)
6321 {
6322 const char_t* aname = a->name;
6323 if (aname && impl::strequal(attr_name, aname))
6324 {
6325 const char_t* avalue = a->value;
6326 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT("")))
6327 return xml_node(i);
6328 }
6329 }
6330
6331 return xml_node();
6332 }
6333
6334 #ifndef PUGIXML_NO_STL
6335 PUGI__FN string_t xml_node::path(char_t delimiter) const
6336 {
6337 if (!_root) return string_t();
6338
6339 size_t offset = 0;
6340
6341 for (xml_node_struct* i = _root; i; i = i->parent)
6342 {
6343 const char_t* iname = i->name;
6344 offset += (i != _root);
6345 offset += iname ? impl::strlength(iname) : 0;
6346 }
6347
6348 string_t result;
6349 result.resize(offset);
6350
6351 for (xml_node_struct* j = _root; j; j = j->parent)
6352 {
6353 if (j != _root)
6354 result[--offset] = delimiter;
6355
6356 const char_t* jname = j->name;
6357 if (jname)
6358 {
6359 size_t length = impl::strlength(jname);
6360
6361 offset -= length;
6362 memcpy(&result[offset], jname, length * sizeof(char_t));
6363 }
6364 }
6365
6366 assert(offset == 0);
6367
6368 return result;
6369 }
6370 #endif
6371
6372 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const
6373 {
6374 xml_node context = path_[0] == delimiter ? root() : *this;
6375
6376 if (!context._root) return xml_node();
6377
6378 const char_t* path_segment = path_;
6379
6380 while (*path_segment == delimiter) ++path_segment;
6381
6382 const char_t* path_segment_end = path_segment;
6383
6384 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end;
6385
6386 if (path_segment == path_segment_end) return context;
6387
6388 const char_t* next_segment = path_segment_end;
6389
6390 while (*next_segment == delimiter) ++next_segment;
6391
6392 if (*path_segment == '.' && path_segment + 1 == path_segment_end)
6393 return context.first_element_by_path(next_segment, delimiter);
6394 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end)
6395 return context.parent().first_element_by_path(next_segment, delimiter);
6396 else
6397 {
6398 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling)
6399 {
6400 const char_t* jname = j->name;
6401 if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment)))
6402 {
6403 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter);
6404
6405 if (subsearch) return subsearch;
6406 }
6407 }
6408
6409 return xml_node();
6410 }
6411 }
6412
6413 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker)
6414 {
6415 walker._depth = -1;
6416
6417 xml_node arg_begin(_root);
6418 if (!walker.begin(arg_begin)) return false;
6419
6420 xml_node_struct* cur = _root ? _root->first_child + 0 : 0;
6421
6422 if (cur)
6423 {
6424 ++walker._depth;
6425
6426 do
6427 {
6428 xml_node arg_for_each(cur);
6429 if (!walker.for_each(arg_for_each))
6430 return false;
6431
6432 if (cur->first_child)
6433 {
6434 ++walker._depth;
6435 cur = cur->first_child;
6436 }
6437 else if (cur->next_sibling)
6438 cur = cur->next_sibling;
6439 else
6440 {
6441 while (!cur->next_sibling && cur != _root && cur->parent)
6442 {
6443 --walker._depth;
6444 cur = cur->parent;
6445 }
6446
6447 if (cur != _root)
6448 cur = cur->next_sibling;
6449 }
6450 }
6451 while (cur && cur != _root);
6452 }
6453
6454 assert(walker._depth == -1);
6455
6456 xml_node arg_end(_root);
6457 return walker.end(arg_end);
6458 }
6459
6460 PUGI__FN size_t xml_node::hash_value() const
6461 {
6462 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct));
6463 }
6464
6465 PUGI__FN xml_node_struct* xml_node::internal_object() const
6466 {
6467 return _root;
6468 }
6469
6470 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6471 {
6472 if (!_root) return;
6473
6474 impl::xml_buffered_writer buffered_writer(writer, encoding);
6475
6476 impl::node_output(buffered_writer, _root, indent, flags, depth);
6477
6478 buffered_writer.flush();
6479 }
6480
6481 #ifndef PUGIXML_NO_STL
6482 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const
6483 {
6484 xml_writer_stream writer(stream);
6485
6486 print(writer, indent, flags, encoding, depth);
6487 }
6488
6489 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const
6490 {
6491 xml_writer_stream writer(stream);
6492
6493 print(writer, indent, flags, encoding_wchar, depth);
6494 }
6495 #endif
6496
6497 PUGI__FN ptrdiff_t xml_node::offset_debug() const
6498 {
6499 if (!_root) return -1;
6500
6501 impl::xml_document_struct& doc = impl::get_document(_root);
6502
6503 // we can determine the offset reliably only if there is exactly once parse buffer
6504 if (!doc.buffer || doc.extra_buffers) return -1;
6505
6506 switch (type())
6507 {
6508 case node_document:
6509 return 0;
6510
6511 case node_element:
6512 case node_declaration:
6513 case node_pi:
6514 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1;
6515
6516 case node_pcdata:
6517 case node_cdata:
6518 case node_comment:
6519 case node_doctype:
6520 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1;
6521
6522 default:
6523 assert(false && "Invalid node type"); // unreachable
6524 return -1;
6525 }
6526 }
6527
6528 #ifdef __BORLANDC__
6529 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs)
6530 {
6531 return (bool)lhs && rhs;
6532 }
6533
6534 PUGI__FN bool operator||(const xml_node& lhs, bool rhs)
6535 {
6536 return (bool)lhs || rhs;
6537 }
6538 #endif
6539
6540 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root)
6541 {
6542 }
6543
6544 PUGI__FN xml_node_struct* xml_text::_data() const
6545 {
6546 if (!_root || impl::is_text_node(_root)) return _root;
6547
6548 // element nodes can have value if parse_embed_pcdata was used
6549 if (PUGI__NODETYPE(_root) == node_element && _root->value)
6550 return _root;
6551
6552 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling)
6553 if (impl::is_text_node(node))
6554 return node;
6555
6556 return 0;
6557 }
6558
6559 PUGI__FN xml_node_struct* xml_text::_data_new()
6560 {
6561 xml_node_struct* d = _data();
6562 if (d) return d;
6563
6564 return xml_node(_root).append_child(node_pcdata).internal_object();
6565 }
6566
6567 PUGI__FN xml_text::xml_text(): _root(0)
6568 {
6569 }
6570
6571 PUGI__FN static void unspecified_bool_xml_text(xml_text***)
6572 {
6573 }
6574
6575 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const
6576 {
6577 return _data() ? unspecified_bool_xml_text : 0;
6578 }
6579
6580 PUGI__FN bool xml_text::operator!() const
6581 {
6582 return !_data();
6583 }
6584
6585 PUGI__FN bool xml_text::empty() const
6586 {
6587 return _data() == 0;
6588 }
6589
6590 PUGI__FN const char_t* xml_text::get() const
6591 {
6592 xml_node_struct* d = _data();
6593 if (!d) return PUGIXML_TEXT("");
6594 const char_t* value = d->value;
6595 return value ? value : PUGIXML_TEXT("");
6596 }
6597
6598 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const
6599 {
6600 xml_node_struct* d = _data();
6601 if (!d) return def;
6602 const char_t* value = d->value;
6603 return value ? value : def;
6604 }
6605
6606 PUGI__FN int xml_text::as_int(int def) const
6607 {
6608 xml_node_struct* d = _data();
6609 if (!d) return def;
6610 const char_t* value = d->value;
6611 return value ? impl::get_value_int(value) : def;
6612 }
6613
6614 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const
6615 {
6616 xml_node_struct* d = _data();
6617 if (!d) return def;
6618 const char_t* value = d->value;
6619 return value ? impl::get_value_uint(value) : def;
6620 }
6621
6622 PUGI__FN double xml_text::as_double(double def) const
6623 {
6624 xml_node_struct* d = _data();
6625 if (!d) return def;
6626 const char_t* value = d->value;
6627 return value ? impl::get_value_double(value) : def;
6628 }
6629
6630 PUGI__FN float xml_text::as_float(float def) const
6631 {
6632 xml_node_struct* d = _data();
6633 if (!d) return def;
6634 const char_t* value = d->value;
6635 return value ? impl::get_value_float(value) : def;
6636 }
6637
6638 PUGI__FN bool xml_text::as_bool(bool def) const
6639 {
6640 xml_node_struct* d = _data();
6641 if (!d) return def;
6642 const char_t* value = d->value;
6643 return value ? impl::get_value_bool(value) : def;
6644 }
6645
6646 #ifdef PUGIXML_HAS_LONG_LONG
6647 PUGI__FN long long xml_text::as_llong(long long def) const
6648 {
6649 xml_node_struct* d = _data();
6650 if (!d) return def;
6651 const char_t* value = d->value;
6652 return value ? impl::get_value_llong(value) : def;
6653 }
6654
6655 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const
6656 {
6657 xml_node_struct* d = _data();
6658 if (!d) return def;
6659 const char_t* value = d->value;
6660 return value ? impl::get_value_ullong(value) : def;
6661 }
6662 #endif
6663
6664 PUGI__FN bool xml_text::set(const char_t* rhs, size_t sz)
6665 {
6666 xml_node_struct* dn = _data_new();
6667
6668 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, sz) : false;
6669 }
6670
6671 PUGI__FN bool xml_text::set(const char_t* rhs)
6672 {
6673 xml_node_struct* dn = _data_new();
6674
6675 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false;
6676 }
6677
6678 PUGI__FN bool xml_text::set(int rhs)
6679 {
6680 xml_node_struct* dn = _data_new();
6681
6682 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6683 }
6684
6685 PUGI__FN bool xml_text::set(unsigned int rhs)
6686 {
6687 xml_node_struct* dn = _data_new();
6688
6689 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6690 }
6691
6692 PUGI__FN bool xml_text::set(long rhs)
6693 {
6694 xml_node_struct* dn = _data_new();
6695
6696 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6697 }
6698
6699 PUGI__FN bool xml_text::set(unsigned long rhs)
6700 {
6701 xml_node_struct* dn = _data_new();
6702
6703 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6704 }
6705
6706 PUGI__FN bool xml_text::set(float rhs)
6707 {
6708 xml_node_struct* dn = _data_new();
6709
6710 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false;
6711 }
6712
6713 PUGI__FN bool xml_text::set(float rhs, int precision)
6714 {
6715 xml_node_struct* dn = _data_new();
6716
6717 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6718 }
6719
6720 PUGI__FN bool xml_text::set(double rhs)
6721 {
6722 xml_node_struct* dn = _data_new();
6723
6724 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false;
6725 }
6726
6727 PUGI__FN bool xml_text::set(double rhs, int precision)
6728 {
6729 xml_node_struct* dn = _data_new();
6730
6731 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false;
6732 }
6733
6734 PUGI__FN bool xml_text::set(bool rhs)
6735 {
6736 xml_node_struct* dn = _data_new();
6737
6738 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false;
6739 }
6740
6741 #ifdef PUGIXML_HAS_LONG_LONG
6742 PUGI__FN bool xml_text::set(long long rhs)
6743 {
6744 xml_node_struct* dn = _data_new();
6745
6746 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false;
6747 }
6748
6749 PUGI__FN bool xml_text::set(unsigned long long rhs)
6750 {
6751 xml_node_struct* dn = _data_new();
6752
6753 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false;
6754 }
6755 #endif
6756
6757 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs)
6758 {
6759 set(rhs);
6760 return *this;
6761 }
6762
6763 PUGI__FN xml_text& xml_text::operator=(int rhs)
6764 {
6765 set(rhs);
6766 return *this;
6767 }
6768
6769 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs)
6770 {
6771 set(rhs);
6772 return *this;
6773 }
6774
6775 PUGI__FN xml_text& xml_text::operator=(long rhs)
6776 {
6777 set(rhs);
6778 return *this;
6779 }
6780
6781 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs)
6782 {
6783 set(rhs);
6784 return *this;
6785 }
6786
6787 PUGI__FN xml_text& xml_text::operator=(double rhs)
6788 {
6789 set(rhs);
6790 return *this;
6791 }
6792
6793 PUGI__FN xml_text& xml_text::operator=(float rhs)
6794 {
6795 set(rhs);
6796 return *this;
6797 }
6798
6799 PUGI__FN xml_text& xml_text::operator=(bool rhs)
6800 {
6801 set(rhs);
6802 return *this;
6803 }
6804
6805 #ifdef PUGIXML_HAS_LONG_LONG
6806 PUGI__FN xml_text& xml_text::operator=(long long rhs)
6807 {
6808 set(rhs);
6809 return *this;
6810 }
6811
6812 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs)
6813 {
6814 set(rhs);
6815 return *this;
6816 }
6817 #endif
6818
6819 PUGI__FN xml_node xml_text::data() const
6820 {
6821 return xml_node(_data());
6822 }
6823
6824 #ifdef __BORLANDC__
6825 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs)
6826 {
6827 return (bool)lhs && rhs;
6828 }
6829
6830 PUGI__FN bool operator||(const xml_text& lhs, bool rhs)
6831 {
6832 return (bool)lhs || rhs;
6833 }
6834 #endif
6835
6836 PUGI__FN xml_node_iterator::xml_node_iterator()
6837 {
6838 }
6839
6840 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent())
6841 {
6842 }
6843
6844 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6845 {
6846 }
6847
6848 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const
6849 {
6850 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6851 }
6852
6853 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const
6854 {
6855 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6856 }
6857
6858 PUGI__FN xml_node& xml_node_iterator::operator*() const
6859 {
6860 assert(_wrap._root);
6861 return _wrap;
6862 }
6863
6864 PUGI__FN xml_node* xml_node_iterator::operator->() const
6865 {
6866 assert(_wrap._root);
6867 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6868 }
6869
6870 PUGI__FN xml_node_iterator& xml_node_iterator::operator++()
6871 {
6872 assert(_wrap._root);
6873 _wrap._root = _wrap._root->next_sibling;
6874 return *this;
6875 }
6876
6877 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int)
6878 {
6879 xml_node_iterator temp = *this;
6880 ++*this;
6881 return temp;
6882 }
6883
6884 PUGI__FN xml_node_iterator& xml_node_iterator::operator--()
6885 {
6886 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child();
6887 return *this;
6888 }
6889
6890 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int)
6891 {
6892 xml_node_iterator temp = *this;
6893 --*this;
6894 return temp;
6895 }
6896
6897 PUGI__FN xml_attribute_iterator::xml_attribute_iterator()
6898 {
6899 }
6900
6901 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent)
6902 {
6903 }
6904
6905 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent)
6906 {
6907 }
6908
6909 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const
6910 {
6911 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root;
6912 }
6913
6914 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const
6915 {
6916 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root;
6917 }
6918
6919 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const
6920 {
6921 assert(_wrap._attr);
6922 return _wrap;
6923 }
6924
6925 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const
6926 {
6927 assert(_wrap._attr);
6928 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround
6929 }
6930
6931 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++()
6932 {
6933 assert(_wrap._attr);
6934 _wrap._attr = _wrap._attr->next_attribute;
6935 return *this;
6936 }
6937
6938 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int)
6939 {
6940 xml_attribute_iterator temp = *this;
6941 ++*this;
6942 return temp;
6943 }
6944
6945 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--()
6946 {
6947 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute();
6948 return *this;
6949 }
6950
6951 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int)
6952 {
6953 xml_attribute_iterator temp = *this;
6954 --*this;
6955 return temp;
6956 }
6957
6958 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0)
6959 {
6960 }
6961
6962 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name)
6963 {
6964 }
6965
6966 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name)
6967 {
6968 }
6969
6970 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const
6971 {
6972 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root;
6973 }
6974
6975 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const
6976 {
6977 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root;
6978 }
6979
6980 PUGI__FN xml_node& xml_named_node_iterator::operator*() const
6981 {
6982 assert(_wrap._root);
6983 return _wrap;
6984 }
6985
6986 PUGI__FN xml_node* xml_named_node_iterator::operator->() const
6987 {
6988 assert(_wrap._root);
6989 return const_cast<xml_node*>(&_wrap); // BCC5 workaround
6990 }
6991
6992 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++()
6993 {
6994 assert(_wrap._root);
6995 _wrap = _wrap.next_sibling(_name);
6996 return *this;
6997 }
6998
6999 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int)
7000 {
7001 xml_named_node_iterator temp = *this;
7002 ++*this;
7003 return temp;
7004 }
7005
7006 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--()
7007 {
7008 if (_wrap._root)
7009 _wrap = _wrap.previous_sibling(_name);
7010 else
7011 {
7012 _wrap = _parent.last_child();
7013
7014 if (!impl::strequal(_wrap.name(), _name))
7015 _wrap = _wrap.previous_sibling(_name);
7016 }
7017
7018 return *this;
7019 }
7020
7021 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int)
7022 {
7023 xml_named_node_iterator temp = *this;
7024 --*this;
7025 return temp;
7026 }
7027
7028 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto)
7029 {
7030 }
7031
7032 PUGI__FN xml_parse_result::operator bool() const
7033 {
7034 return status == status_ok;
7035 }
7036
7037 PUGI__FN const char* xml_parse_result::description() const
7038 {
7039 switch (status)
7040 {
7041 case status_ok: return "No error";
7042
7043 case status_file_not_found: return "File was not found";
7044 case status_io_error: return "Error reading from file/stream";
7045 case status_out_of_memory: return "Could not allocate memory";
7046 case status_internal_error: return "Internal error occurred";
7047
7048 case status_unrecognized_tag: return "Could not determine tag type";
7049
7050 case status_bad_pi: return "Error parsing document declaration/processing instruction";
7051 case status_bad_comment: return "Error parsing comment";
7052 case status_bad_cdata: return "Error parsing CDATA section";
7053 case status_bad_doctype: return "Error parsing document type declaration";
7054 case status_bad_pcdata: return "Error parsing PCDATA section";
7055 case status_bad_start_element: return "Error parsing start element tag";
7056 case status_bad_attribute: return "Error parsing element attribute";
7057 case status_bad_end_element: return "Error parsing end element tag";
7058 case status_end_element_mismatch: return "Start-end tags mismatch";
7059
7060 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document";
7061
7062 case status_no_document_element: return "No document element found";
7063
7064 default: return "Unknown error";
7065 }
7066 }
7067
7068 PUGI__FN xml_document::xml_document(): _buffer(0)
7069 {
7070 _create();
7071 }
7072
7073 PUGI__FN xml_document::~xml_document()
7074 {
7075 _destroy();
7076 }
7077
7078 #ifdef PUGIXML_HAS_MOVE
7079 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0)
7080 {
7081 _create();
7082 _move(rhs);
7083 }
7084
7085 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7086 {
7087 if (this == &rhs) return *this;
7088
7089 _destroy();
7090 _create();
7091 _move(rhs);
7092
7093 return *this;
7094 }
7095 #endif
7096
7097 PUGI__FN void xml_document::reset()
7098 {
7099 _destroy();
7100 _create();
7101 }
7102
7103 PUGI__FN void xml_document::reset(const xml_document& proto)
7104 {
7105 reset();
7106
7107 impl::node_copy_tree(_root, proto._root);
7108 }
7109
7110 PUGI__FN void xml_document::_create()
7111 {
7112 assert(!_root);
7113
7114 #ifdef PUGIXML_COMPACT
7115 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit
7116 const size_t page_offset = sizeof(void*);
7117 #else
7118 const size_t page_offset = 0;
7119 #endif
7120
7121 // initialize sentinel page
7122 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory));
7123
7124 // prepare page structure
7125 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory);
7126 assert(page);
7127
7128 page->busy_size = impl::xml_memory_page_size;
7129
7130 // setup first page marker
7131 #ifdef PUGIXML_COMPACT
7132 // round-trip through void* to avoid 'cast increases required alignment of target type' warning
7133 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page)));
7134 *page->compact_page_marker = sizeof(impl::xml_memory_page);
7135 #endif
7136
7137 // allocate new root
7138 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page);
7139 _root->prev_sibling_c = _root;
7140
7141 // setup sentinel page
7142 page->allocator = static_cast<impl::xml_document_struct*>(_root);
7143
7144 // setup hash table pointer in allocator
7145 #ifdef PUGIXML_COMPACT
7146 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash;
7147 #endif
7148
7149 // verify the document allocation
7150 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory));
7151 }
7152
7153 PUGI__FN void xml_document::_destroy()
7154 {
7155 assert(_root);
7156
7157 // destroy static storage
7158 if (_buffer)
7159 {
7160 impl::xml_memory::deallocate(_buffer);
7161 _buffer = 0;
7162 }
7163
7164 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator)
7165 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next)
7166 {
7167 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer);
7168 }
7169
7170 // destroy dynamic storage, leave sentinel page (it's in static memory)
7171 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root);
7172 assert(root_page && !root_page->prev);
7173 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory));
7174
7175 for (impl::xml_memory_page* page = root_page->next; page; )
7176 {
7177 impl::xml_memory_page* next = page->next;
7178
7179 impl::xml_allocator::deallocate_page(page);
7180
7181 page = next;
7182 }
7183
7184 #ifdef PUGIXML_COMPACT
7185 // destroy hash table
7186 static_cast<impl::xml_document_struct*>(_root)->hash.clear();
7187 #endif
7188
7189 _root = 0;
7190 }
7191
7192 #ifdef PUGIXML_HAS_MOVE
7193 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT
7194 {
7195 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root);
7196 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root);
7197
7198 // save first child pointer for later; this needs hash access
7199 xml_node_struct* other_first_child = other->first_child;
7200
7201 #ifdef PUGIXML_COMPACT
7202 // reserve space for the hash table up front; this is the only operation that can fail
7203 // if it does, we have no choice but to throw (if we have exceptions)
7204 if (other_first_child)
7205 {
7206 size_t other_children = 0;
7207 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7208 other_children++;
7209
7210 // in compact mode, each pointer assignment could result in a hash table request
7211 // during move, we have to relocate document first_child and parents of all children
7212 // normally there's just one child and its parent has a pointerless encoding but
7213 // we assume the worst here
7214 if (!other->_hash->reserve(other_children + 1))
7215 {
7216 #ifdef PUGIXML_NO_EXCEPTIONS
7217 return;
7218 #else
7219 throw std::bad_alloc();
7220 #endif
7221 }
7222 }
7223 #endif
7224
7225 // move allocation state
7226 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state
7227 if (other->_root != PUGI__GETPAGE(other))
7228 {
7229 doc->_root = other->_root;
7230 doc->_busy_size = other->_busy_size;
7231 }
7232
7233 // move buffer state
7234 doc->buffer = other->buffer;
7235 doc->extra_buffers = other->extra_buffers;
7236 _buffer = rhs._buffer;
7237
7238 #ifdef PUGIXML_COMPACT
7239 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child
7240 doc->hash = other->hash;
7241 doc->_hash = &doc->hash;
7242
7243 // make sure we don't access other hash up until the end when we reinitialize other document
7244 other->_hash = 0;
7245 #endif
7246
7247 // move page structure
7248 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc);
7249 assert(doc_page && !doc_page->prev && !doc_page->next);
7250
7251 impl::xml_memory_page* other_page = PUGI__GETPAGE(other);
7252 assert(other_page && !other_page->prev);
7253
7254 // relink pages since root page is embedded into xml_document
7255 if (impl::xml_memory_page* page = other_page->next)
7256 {
7257 assert(page->prev == other_page);
7258
7259 page->prev = doc_page;
7260
7261 doc_page->next = page;
7262 other_page->next = 0;
7263 }
7264
7265 // make sure pages point to the correct document state
7266 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next)
7267 {
7268 assert(page->allocator == other);
7269
7270 page->allocator = doc;
7271
7272 #ifdef PUGIXML_COMPACT
7273 // this automatically migrates most children between documents and prevents ->parent assignment from allocating
7274 if (page->compact_shared_parent == other)
7275 page->compact_shared_parent = doc;
7276 #endif
7277 }
7278
7279 // move tree structure
7280 assert(!doc->first_child);
7281
7282 doc->first_child = other_first_child;
7283
7284 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling)
7285 {
7286 #ifdef PUGIXML_COMPACT
7287 // most children will have migrated when we reassigned compact_shared_parent
7288 assert(node->parent == other || node->parent == doc);
7289
7290 node->parent = doc;
7291 #else
7292 assert(node->parent == other);
7293 node->parent = doc;
7294 #endif
7295 }
7296
7297 // reset other document
7298 new (other) impl::xml_document_struct(PUGI__GETPAGE(other));
7299 rhs._buffer = 0;
7300 }
7301 #endif
7302
7303 #ifndef PUGIXML_NO_STL
7304 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding)
7305 {
7306 reset();
7307
7308 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer);
7309 }
7310
7311 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options)
7312 {
7313 reset();
7314
7315 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer);
7316 }
7317 #endif
7318
7319 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options)
7320 {
7321 // Force native encoding (skip autodetection)
7322 #ifdef PUGIXML_WCHAR_MODE
7323 xml_encoding encoding = encoding_wchar;
7324 #else
7325 xml_encoding encoding = encoding_utf8;
7326 #endif
7327
7328 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding);
7329 }
7330
7331 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options)
7332 {
7333 return load_string(contents, options);
7334 }
7335
7336 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding)
7337 {
7338 reset();
7339
7340 using impl::auto_deleter; // MSVC7 workaround
7341 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file);
7342
7343 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7344 }
7345
7346 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding)
7347 {
7348 reset();
7349
7350 using impl::auto_deleter; // MSVC7 workaround
7351 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file);
7352
7353 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer);
7354 }
7355
7356 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding)
7357 {
7358 reset();
7359
7360 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer);
7361 }
7362
7363 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7364 {
7365 reset();
7366
7367 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer);
7368 }
7369
7370 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding)
7371 {
7372 reset();
7373
7374 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer);
7375 }
7376
7377 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7378 {
7379 impl::xml_buffered_writer buffered_writer(writer, encoding);
7380
7381 if ((flags & format_write_bom) && encoding != encoding_latin1)
7382 {
7383 // BOM always represents the codepoint U+FEFF, so just write it in native encoding
7384 #ifdef PUGIXML_WCHAR_MODE
7385 unsigned int bom = 0xfeff;
7386 buffered_writer.write(static_cast<wchar_t>(bom));
7387 #else
7388 buffered_writer.write('\xef', '\xbb', '\xbf');
7389 #endif
7390 }
7391
7392 if (!(flags & format_no_declaration) && !impl::has_declaration(_root))
7393 {
7394 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\""));
7395 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\""));
7396 buffered_writer.write('?', '>');
7397 if (!(flags & format_raw)) buffered_writer.write('\n');
7398 }
7399
7400 impl::node_output(buffered_writer, _root, indent, flags, 0);
7401
7402 buffered_writer.flush();
7403 }
7404
7405 #ifndef PUGIXML_NO_STL
7406 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7407 {
7408 xml_writer_stream writer(stream);
7409
7410 save(writer, indent, flags, encoding);
7411 }
7412
7413 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const
7414 {
7415 xml_writer_stream writer(stream);
7416
7417 save(writer, indent, flags, encoding_wchar);
7418 }
7419 #endif
7420
7421 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7422 {
7423 using impl::auto_deleter; // MSVC7 workaround
7424 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file);
7425
7426 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
7427 }
7428
7429 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const
7430 {
7431 using impl::auto_deleter; // MSVC7 workaround
7432 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file);
7433
7434 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0;
7435 }
7436
7437 PUGI__FN xml_node xml_document::document_element() const
7438 {
7439 assert(_root);
7440
7441 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling)
7442 if (PUGI__NODETYPE(i) == node_element)
7443 return xml_node(i);
7444
7445 return xml_node();
7446 }
7447
7448 #ifndef PUGIXML_NO_STL
7449 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str)
7450 {
7451 assert(str);
7452
7453 return impl::as_utf8_impl(str, impl::strlength_wide(str));
7454 }
7455
7456 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str)
7457 {
7458 return impl::as_utf8_impl(str.c_str(), str.size());
7459 }
7460
7461 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str)
7462 {
7463 assert(str);
7464
7465 return impl::as_wide_impl(str, strlen(str));
7466 }
7467
7468 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str)
7469 {
7470 return impl::as_wide_impl(str.c_str(), str.size());
7471 }
7472 #endif
7473
7474 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate)
7475 {
7476 impl::xml_memory::allocate = allocate;
7477 impl::xml_memory::deallocate = deallocate;
7478 }
7479
7480 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function()
7481 {
7482 return impl::xml_memory::allocate;
7483 }
7484
7485 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function()
7486 {
7487 return impl::xml_memory::deallocate;
7488 }
7489 }
7490
7491 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC))
7492 namespace std
7493 {
7494 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier)
7495 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&)
7496 {
7497 return std::bidirectional_iterator_tag();
7498 }
7499
7500 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&)
7501 {
7502 return std::bidirectional_iterator_tag();
7503 }
7504
7505 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&)
7506 {
7507 return std::bidirectional_iterator_tag();
7508 }
7509 }
7510 #endif
7511
7512 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC)
7513 namespace std
7514 {
7515 // Workarounds for (non-standard) iterator category detection
7516 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&)
7517 {
7518 return std::bidirectional_iterator_tag();
7519 }
7520
7521 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&)
7522 {
7523 return std::bidirectional_iterator_tag();
7524 }
7525
7526 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&)
7527 {
7528 return std::bidirectional_iterator_tag();
7529 }
7530 }
7531 #endif
7532
7533 #ifndef PUGIXML_NO_XPATH
7534 // STL replacements
7535 PUGI__NS_BEGIN
7536 struct equal_to
7537 {
7538 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7539 {
7540 return lhs == rhs;
7541 }
7542 };
7543
7544 struct not_equal_to
7545 {
7546 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7547 {
7548 return lhs != rhs;
7549 }
7550 };
7551
7552 struct less
7553 {
7554 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7555 {
7556 return lhs < rhs;
7557 }
7558 };
7559
7560 struct less_equal
7561 {
7562 template <typename T> bool operator()(const T& lhs, const T& rhs) const
7563 {
7564 return lhs <= rhs;
7565 }
7566 };
7567
7568 template <typename T> inline void swap(T& lhs, T& rhs)
7569 {
7570 T temp = lhs;
7571 lhs = rhs;
7572 rhs = temp;
7573 }
7574
7575 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred)
7576 {
7577 I result = begin;
7578
7579 for (I it = begin + 1; it != end; ++it)
7580 if (pred(*it, *result))
7581 result = it;
7582
7583 return result;
7584 }
7585
7586 template <typename I> PUGI__FN void reverse(I begin, I end)
7587 {
7588 while (end - begin > 1)
7589 swap(*begin++, *--end);
7590 }
7591
7592 template <typename I> PUGI__FN I unique(I begin, I end)
7593 {
7594 // fast skip head
7595 while (end - begin > 1 && *begin != *(begin + 1))
7596 begin++;
7597
7598 if (begin == end)
7599 return begin;
7600
7601 // last written element
7602 I write = begin++;
7603
7604 // merge unique elements
7605 while (begin != end)
7606 {
7607 if (*begin != *write)
7608 *++write = *begin++;
7609 else
7610 begin++;
7611 }
7612
7613 // past-the-end (write points to live element)
7614 return write + 1;
7615 }
7616
7617 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred)
7618 {
7619 if (begin == end)
7620 return;
7621
7622 for (T* it = begin + 1; it != end; ++it)
7623 {
7624 T val = *it;
7625 T* hole = it;
7626
7627 // move hole backwards
7628 while (hole > begin && pred(val, *(hole - 1)))
7629 {
7630 *hole = *(hole - 1);
7631 hole--;
7632 }
7633
7634 // fill hole with element
7635 *hole = val;
7636 }
7637 }
7638
7639 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred)
7640 {
7641 if (pred(*middle, *first))
7642 swap(middle, first);
7643 if (pred(*last, *middle))
7644 swap(last, middle);
7645 if (pred(*middle, *first))
7646 swap(middle, first);
7647
7648 return middle;
7649 }
7650
7651 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend)
7652 {
7653 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups)
7654 T* eq = begin;
7655 T* lt = begin;
7656 T* gt = end;
7657
7658 while (lt < gt)
7659 {
7660 if (pred(*lt, pivot))
7661 lt++;
7662 else if (*lt == pivot)
7663 swap(*eq++, *lt++);
7664 else
7665 swap(*lt, *--gt);
7666 }
7667
7668 // we now have just 4 groups: = < >; move equal elements to the middle
7669 T* eqbeg = gt;
7670
7671 for (T* it = begin; it != eq; ++it)
7672 swap(*it, *--eqbeg);
7673
7674 *out_eqbeg = eqbeg;
7675 *out_eqend = gt;
7676 }
7677
7678 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred)
7679 {
7680 // sort large chunks
7681 while (end - begin > 16)
7682 {
7683 // find median element
7684 I middle = begin + (end - begin) / 2;
7685 I median = median3(begin, middle, end - 1, pred);
7686
7687 // partition in three chunks (< = >)
7688 I eqbeg, eqend;
7689 partition3(begin, end, *median, pred, &eqbeg, &eqend);
7690
7691 // loop on larger half
7692 if (eqbeg - begin > end - eqend)
7693 {
7694 sort(eqend, end, pred);
7695 end = eqbeg;
7696 }
7697 else
7698 {
7699 sort(begin, eqbeg, pred);
7700 begin = eqend;
7701 }
7702 }
7703
7704 // insertion sort small chunk
7705 insertion_sort(begin, end, pred);
7706 }
7707
7708 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key)
7709 {
7710 assert(key);
7711
7712 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key));
7713
7714 // MurmurHash3 32-bit finalizer
7715 h ^= h >> 16;
7716 h *= 0x85ebca6bu;
7717 h ^= h >> 13;
7718 h *= 0xc2b2ae35u;
7719 h ^= h >> 16;
7720
7721 size_t hashmod = size - 1;
7722 size_t bucket = h & hashmod;
7723
7724 for (size_t probe = 0; probe <= hashmod; ++probe)
7725 {
7726 if (table[bucket] == 0)
7727 {
7728 table[bucket] = key;
7729 return true;
7730 }
7731
7732 if (table[bucket] == key)
7733 return false;
7734
7735 // hash collision, quadratic probing
7736 bucket = (bucket + probe + 1) & hashmod;
7737 }
7738
7739 assert(false && "Hash table is full"); // unreachable
7740 return false;
7741 }
7742 PUGI__NS_END
7743
7744 // Allocator used for AST and evaluation stacks
7745 PUGI__NS_BEGIN
7746 static const size_t xpath_memory_page_size =
7747 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE
7748 PUGIXML_MEMORY_XPATH_PAGE_SIZE
7749 #else
7750 4096
7751 #endif
7752 ;
7753
7754 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*);
7755
7756 struct xpath_memory_block
7757 {
7758 xpath_memory_block* next;
7759 size_t capacity;
7760
7761 union
7762 {
7763 char data[xpath_memory_page_size];
7764 double alignment;
7765 };
7766 };
7767
7768 struct xpath_allocator
7769 {
7770 xpath_memory_block* _root;
7771 size_t _root_size;
7772 bool* _error;
7773
7774 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error)
7775 {
7776 }
7777
7778 void* allocate(size_t size)
7779 {
7780 // round size up to block alignment boundary
7781 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7782
7783 if (_root_size + size <= _root->capacity)
7784 {
7785 void* buf = &_root->data[0] + _root_size;
7786 _root_size += size;
7787 return buf;
7788 }
7789 else
7790 {
7791 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests
7792 size_t block_capacity_base = sizeof(_root->data);
7793 size_t block_capacity_req = size + block_capacity_base / 4;
7794 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req;
7795
7796 size_t block_size = block_capacity + offsetof(xpath_memory_block, data);
7797
7798 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size));
7799 if (!block)
7800 {
7801 if (_error) *_error = true;
7802 return 0;
7803 }
7804
7805 block->next = _root;
7806 block->capacity = block_capacity;
7807
7808 _root = block;
7809 _root_size = size;
7810
7811 return block->data;
7812 }
7813 }
7814
7815 void* reallocate(void* ptr, size_t old_size, size_t new_size)
7816 {
7817 // round size up to block alignment boundary
7818 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7819 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1);
7820
7821 // we can only reallocate the last object
7822 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size);
7823
7824 // try to reallocate the object inplace
7825 if (ptr && _root_size - old_size + new_size <= _root->capacity)
7826 {
7827 _root_size = _root_size - old_size + new_size;
7828 return ptr;
7829 }
7830
7831 // allocate a new block
7832 void* result = allocate(new_size);
7833 if (!result) return 0;
7834
7835 // we have a new block
7836 if (ptr)
7837 {
7838 // copy old data (we only support growing)
7839 assert(new_size >= old_size);
7840 memcpy(result, ptr, old_size);
7841
7842 // free the previous page if it had no other objects
7843 assert(_root->data == result);
7844 assert(_root->next);
7845
7846 if (_root->next->data == ptr)
7847 {
7848 // deallocate the whole page, unless it was the first one
7849 xpath_memory_block* next = _root->next->next;
7850
7851 if (next)
7852 {
7853 xml_memory::deallocate(_root->next);
7854 _root->next = next;
7855 }
7856 }
7857 }
7858
7859 return result;
7860 }
7861
7862 void revert(const xpath_allocator& state)
7863 {
7864 // free all new pages
7865 xpath_memory_block* cur = _root;
7866
7867 while (cur != state._root)
7868 {
7869 xpath_memory_block* next = cur->next;
7870
7871 xml_memory::deallocate(cur);
7872
7873 cur = next;
7874 }
7875
7876 // restore state
7877 _root = state._root;
7878 _root_size = state._root_size;
7879 }
7880
7881 void release()
7882 {
7883 xpath_memory_block* cur = _root;
7884 assert(cur);
7885
7886 while (cur->next)
7887 {
7888 xpath_memory_block* next = cur->next;
7889
7890 xml_memory::deallocate(cur);
7891
7892 cur = next;
7893 }
7894 }
7895 };
7896
7897 struct xpath_allocator_capture
7898 {
7899 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc)
7900 {
7901 }
7902
7903 ~xpath_allocator_capture()
7904 {
7905 _target->revert(_state);
7906 }
7907
7908 xpath_allocator* _target;
7909 xpath_allocator _state;
7910 };
7911
7912 struct xpath_stack
7913 {
7914 xpath_allocator* result;
7915 xpath_allocator* temp;
7916 };
7917
7918 struct xpath_stack_data
7919 {
7920 xpath_memory_block blocks[2];
7921 xpath_allocator result;
7922 xpath_allocator temp;
7923 xpath_stack stack;
7924 bool oom;
7925
7926 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false)
7927 {
7928 blocks[0].next = blocks[1].next = 0;
7929 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data);
7930
7931 stack.result = &result;
7932 stack.temp = &temp;
7933 }
7934
7935 ~xpath_stack_data()
7936 {
7937 result.release();
7938 temp.release();
7939 }
7940 };
7941 PUGI__NS_END
7942
7943 // String class
7944 PUGI__NS_BEGIN
7945 class xpath_string
7946 {
7947 const char_t* _buffer;
7948 bool _uses_heap;
7949 size_t _length_heap;
7950
7951 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc)
7952 {
7953 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t)));
7954 if (!result) return 0;
7955
7956 memcpy(result, string, length * sizeof(char_t));
7957 result[length] = 0;
7958
7959 return result;
7960 }
7961
7962 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap)
7963 {
7964 }
7965
7966 public:
7967 static xpath_string from_const(const char_t* str)
7968 {
7969 return xpath_string(str, false, 0);
7970 }
7971
7972 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end)
7973 {
7974 assert(begin <= end && *end == 0);
7975
7976 return xpath_string(begin, true, static_cast<size_t>(end - begin));
7977 }
7978
7979 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc)
7980 {
7981 assert(begin <= end);
7982
7983 if (begin == end)
7984 return xpath_string();
7985
7986 size_t length = static_cast<size_t>(end - begin);
7987 const char_t* data = duplicate_string(begin, length, alloc);
7988
7989 return data ? xpath_string(data, true, length) : xpath_string();
7990 }
7991
7992 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0)
7993 {
7994 }
7995
7996 void append(const xpath_string& o, xpath_allocator* alloc)
7997 {
7998 // skip empty sources
7999 if (!*o._buffer) return;
8000
8001 // fast append for constant empty target and constant source
8002 if (!*_buffer && !_uses_heap && !o._uses_heap)
8003 {
8004 _buffer = o._buffer;
8005 }
8006 else
8007 {
8008 // need to make heap copy
8009 size_t target_length = length();
8010 size_t source_length = o.length();
8011 size_t result_length = target_length + source_length;
8012
8013 // allocate new buffer
8014 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t)));
8015 if (!result) return;
8016
8017 // append first string to the new buffer in case there was no reallocation
8018 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t));
8019
8020 // append second string to the new buffer
8021 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t));
8022 result[result_length] = 0;
8023
8024 // finalize
8025 _buffer = result;
8026 _uses_heap = true;
8027 _length_heap = result_length;
8028 }
8029 }
8030
8031 const char_t* c_str() const
8032 {
8033 return _buffer;
8034 }
8035
8036 size_t length() const
8037 {
8038 return _uses_heap ? _length_heap : strlength(_buffer);
8039 }
8040
8041 char_t* data(xpath_allocator* alloc)
8042 {
8043 // make private heap copy
8044 if (!_uses_heap)
8045 {
8046 size_t length_ = strlength(_buffer);
8047 const char_t* data_ = duplicate_string(_buffer, length_, alloc);
8048
8049 if (!data_) return 0;
8050
8051 _buffer = data_;
8052 _uses_heap = true;
8053 _length_heap = length_;
8054 }
8055
8056 return const_cast<char_t*>(_buffer);
8057 }
8058
8059 bool empty() const
8060 {
8061 return *_buffer == 0;
8062 }
8063
8064 bool operator==(const xpath_string& o) const
8065 {
8066 return strequal(_buffer, o._buffer);
8067 }
8068
8069 bool operator!=(const xpath_string& o) const
8070 {
8071 return !strequal(_buffer, o._buffer);
8072 }
8073
8074 bool uses_heap() const
8075 {
8076 return _uses_heap;
8077 }
8078 };
8079 PUGI__NS_END
8080
8081 PUGI__NS_BEGIN
8082 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern)
8083 {
8084 while (*pattern && *string == *pattern)
8085 {
8086 string++;
8087 pattern++;
8088 }
8089
8090 return *pattern == 0;
8091 }
8092
8093 PUGI__FN const char_t* find_char(const char_t* s, char_t c)
8094 {
8095 #ifdef PUGIXML_WCHAR_MODE
8096 return wcschr(s, c);
8097 #else
8098 return strchr(s, c);
8099 #endif
8100 }
8101
8102 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p)
8103 {
8104 #ifdef PUGIXML_WCHAR_MODE
8105 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0)
8106 return (*p == 0) ? s : wcsstr(s, p);
8107 #else
8108 return strstr(s, p);
8109 #endif
8110 }
8111
8112 // Converts symbol to lower case, if it is an ASCII one
8113 PUGI__FN char_t tolower_ascii(char_t ch)
8114 {
8115 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch;
8116 }
8117
8118 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc)
8119 {
8120 if (na.attribute())
8121 return xpath_string::from_const(na.attribute().value());
8122 else
8123 {
8124 xml_node n = na.node();
8125
8126 switch (n.type())
8127 {
8128 case node_pcdata:
8129 case node_cdata:
8130 case node_comment:
8131 case node_pi:
8132 return xpath_string::from_const(n.value());
8133
8134 case node_document:
8135 case node_element:
8136 {
8137 xpath_string result;
8138
8139 // element nodes can have value if parse_embed_pcdata was used
8140 if (n.value()[0])
8141 result.append(xpath_string::from_const(n.value()), alloc);
8142
8143 xml_node cur = n.first_child();
8144
8145 while (cur && cur != n)
8146 {
8147 if (cur.type() == node_pcdata || cur.type() == node_cdata)
8148 result.append(xpath_string::from_const(cur.value()), alloc);
8149
8150 if (cur.first_child())
8151 cur = cur.first_child();
8152 else if (cur.next_sibling())
8153 cur = cur.next_sibling();
8154 else
8155 {
8156 while (!cur.next_sibling() && cur != n)
8157 cur = cur.parent();
8158
8159 if (cur != n) cur = cur.next_sibling();
8160 }
8161 }
8162
8163 return result;
8164 }
8165
8166 default:
8167 return xpath_string();
8168 }
8169 }
8170 }
8171
8172 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn)
8173 {
8174 assert(ln->parent == rn->parent);
8175
8176 // there is no common ancestor (the shared parent is null), nodes are from different documents
8177 if (!ln->parent) return ln < rn;
8178
8179 // determine sibling order
8180 xml_node_struct* ls = ln;
8181 xml_node_struct* rs = rn;
8182
8183 while (ls && rs)
8184 {
8185 if (ls == rn) return true;
8186 if (rs == ln) return false;
8187
8188 ls = ls->next_sibling;
8189 rs = rs->next_sibling;
8190 }
8191
8192 // if rn sibling chain ended ln must be before rn
8193 return !rs;
8194 }
8195
8196 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn)
8197 {
8198 // find common ancestor at the same depth, if any
8199 xml_node_struct* lp = ln;
8200 xml_node_struct* rp = rn;
8201
8202 while (lp && rp && lp->parent != rp->parent)
8203 {
8204 lp = lp->parent;
8205 rp = rp->parent;
8206 }
8207
8208 // parents are the same!
8209 if (lp && rp) return node_is_before_sibling(lp, rp);
8210
8211 // nodes are at different depths, need to normalize heights
8212 bool left_higher = !lp;
8213
8214 while (lp)
8215 {
8216 lp = lp->parent;
8217 ln = ln->parent;
8218 }
8219
8220 while (rp)
8221 {
8222 rp = rp->parent;
8223 rn = rn->parent;
8224 }
8225
8226 // one node is the ancestor of the other
8227 if (ln == rn) return left_higher;
8228
8229 // find common ancestor... again
8230 while (ln->parent != rn->parent)
8231 {
8232 ln = ln->parent;
8233 rn = rn->parent;
8234 }
8235
8236 return node_is_before_sibling(ln, rn);
8237 }
8238
8239 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node)
8240 {
8241 while (node && node != parent) node = node->parent;
8242
8243 return parent && node == parent;
8244 }
8245
8246 PUGI__FN const void* document_buffer_order(const xpath_node& xnode)
8247 {
8248 xml_node_struct* node = xnode.node().internal_object();
8249
8250 if (node)
8251 {
8252 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0)
8253 {
8254 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name;
8255 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value;
8256 }
8257
8258 return 0;
8259 }
8260
8261 xml_attribute_struct* attr = xnode.attribute().internal_object();
8262
8263 if (attr)
8264 {
8265 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0)
8266 {
8267 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name;
8268 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value;
8269 }
8270
8271 return 0;
8272 }
8273
8274 return 0;
8275 }
8276
8277 struct document_order_comparator
8278 {
8279 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const
8280 {
8281 // optimized document order based check
8282 const void* lo = document_buffer_order(lhs);
8283 const void* ro = document_buffer_order(rhs);
8284
8285 if (lo && ro) return lo < ro;
8286
8287 // slow comparison
8288 xml_node ln = lhs.node(), rn = rhs.node();
8289
8290 // compare attributes
8291 if (lhs.attribute() && rhs.attribute())
8292 {
8293 // shared parent
8294 if (lhs.parent() == rhs.parent())
8295 {
8296 // determine sibling order
8297 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute())
8298 if (a == rhs.attribute())
8299 return true;
8300
8301 return false;
8302 }
8303
8304 // compare attribute parents
8305 ln = lhs.parent();
8306 rn = rhs.parent();
8307 }
8308 else if (lhs.attribute())
8309 {
8310 // attributes go after the parent element
8311 if (lhs.parent() == rhs.node()) return false;
8312
8313 ln = lhs.parent();
8314 }
8315 else if (rhs.attribute())
8316 {
8317 // attributes go after the parent element
8318 if (rhs.parent() == lhs.node()) return true;
8319
8320 rn = rhs.parent();
8321 }
8322
8323 if (ln == rn) return false;
8324
8325 if (!ln || !rn) return ln < rn;
8326
8327 return node_is_before(ln.internal_object(), rn.internal_object());
8328 }
8329 };
8330
8331 PUGI__FN double gen_nan()
8332 {
8333 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24))
8334 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t));
8335 typedef uint32_t UI; // BCC5 workaround
8336 union { float f; UI i; } u;
8337 u.i = 0x7fc00000;
8338 return double(u.f);
8339 #else
8340 // fallback
8341 const volatile double zero = 0.0;
8342 return zero / zero;
8343 #endif
8344 }
8345
8346 PUGI__FN bool is_nan(double value)
8347 {
8348 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8349 return !!_isnan(value);
8350 #elif defined(fpclassify) && defined(FP_NAN)
8351 return fpclassify(value) == FP_NAN;
8352 #else
8353 // fallback
8354 const volatile double v = value;
8355 return v != v;
8356 #endif
8357 }
8358
8359 PUGI__FN const char_t* convert_number_to_string_special(double value)
8360 {
8361 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__)
8362 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0;
8363 if (_isnan(value)) return PUGIXML_TEXT("NaN");
8364 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8365 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO)
8366 switch (fpclassify(value))
8367 {
8368 case FP_NAN:
8369 return PUGIXML_TEXT("NaN");
8370
8371 case FP_INFINITE:
8372 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8373
8374 case FP_ZERO:
8375 return PUGIXML_TEXT("0");
8376
8377 default:
8378 return 0;
8379 }
8380 #else
8381 // fallback
8382 const volatile double v = value;
8383
8384 if (v == 0) return PUGIXML_TEXT("0");
8385 if (v != v) return PUGIXML_TEXT("NaN");
8386 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity");
8387 return 0;
8388 #endif
8389 }
8390
8391 PUGI__FN bool convert_number_to_boolean(double value)
8392 {
8393 return (value != 0 && !is_nan(value));
8394 }
8395
8396 PUGI__FN void truncate_zeros(char* begin, char* end)
8397 {
8398 while (begin != end && end[-1] == '0') end--;
8399
8400 *end = 0;
8401 }
8402
8403 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent
8404 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400
8405 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8406 {
8407 // get base values
8408 int sign, exponent;
8409 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign);
8410
8411 // truncate redundant zeros
8412 truncate_zeros(buffer, buffer + strlen(buffer));
8413
8414 // fill results
8415 *out_mantissa = buffer;
8416 *out_exponent = exponent;
8417 }
8418 #else
8419 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent)
8420 {
8421 // get a scientific notation value with IEEE DBL_DIG decimals
8422 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value);
8423
8424 // get the exponent (possibly negative)
8425 char* exponent_string = strchr(buffer, 'e');
8426 assert(exponent_string);
8427
8428 int exponent = atoi(exponent_string + 1);
8429
8430 // extract mantissa string: skip sign
8431 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer;
8432 assert(mantissa[0] != '0' && mantissa[1] == '.');
8433
8434 // divide mantissa by 10 to eliminate integer part
8435 mantissa[1] = mantissa[0];
8436 mantissa++;
8437 exponent++;
8438
8439 // remove extra mantissa digits and zero-terminate mantissa
8440 truncate_zeros(mantissa, exponent_string);
8441
8442 // fill results
8443 *out_mantissa = mantissa;
8444 *out_exponent = exponent;
8445 }
8446 #endif
8447
8448 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc)
8449 {
8450 // try special number conversion
8451 const char_t* special = convert_number_to_string_special(value);
8452 if (special) return xpath_string::from_const(special);
8453
8454 // get mantissa + exponent form
8455 char mantissa_buffer[32];
8456
8457 char* mantissa;
8458 int exponent;
8459 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent);
8460
8461 // allocate a buffer of suitable length for the number
8462 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4;
8463 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size));
8464 if (!result) return xpath_string();
8465
8466 // make the number!
8467 char_t* s = result;
8468
8469 // sign
8470 if (value < 0) *s++ = '-';
8471
8472 // integer part
8473 if (exponent <= 0)
8474 {
8475 *s++ = '0';
8476 }
8477 else
8478 {
8479 while (exponent > 0)
8480 {
8481 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9);
8482 *s++ = *mantissa ? *mantissa++ : '0';
8483 exponent--;
8484 }
8485 }
8486
8487 // fractional part
8488 if (*mantissa)
8489 {
8490 // decimal point
8491 *s++ = '.';
8492
8493 // extra zeroes from negative exponent
8494 while (exponent < 0)
8495 {
8496 *s++ = '0';
8497 exponent++;
8498 }
8499
8500 // extra mantissa digits
8501 while (*mantissa)
8502 {
8503 assert(static_cast<unsigned int>(*mantissa - '0') <= 9);
8504 *s++ = *mantissa++;
8505 }
8506 }
8507
8508 // zero-terminate
8509 assert(s < result + result_size);
8510 *s = 0;
8511
8512 return xpath_string::from_heap_preallocated(result, s);
8513 }
8514
8515 PUGI__FN bool check_string_to_number_format(const char_t* string)
8516 {
8517 // parse leading whitespace
8518 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8519
8520 // parse sign
8521 if (*string == '-') ++string;
8522
8523 if (!*string) return false;
8524
8525 // if there is no integer part, there should be a decimal part with at least one digit
8526 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false;
8527
8528 // parse integer part
8529 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8530
8531 // parse decimal part
8532 if (*string == '.')
8533 {
8534 ++string;
8535
8536 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string;
8537 }
8538
8539 // parse trailing whitespace
8540 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string;
8541
8542 return *string == 0;
8543 }
8544
8545 PUGI__FN double convert_string_to_number(const char_t* string)
8546 {
8547 // check string format
8548 if (!check_string_to_number_format(string)) return gen_nan();
8549
8550 // parse string
8551 #ifdef PUGIXML_WCHAR_MODE
8552 return wcstod(string, 0);
8553 #else
8554 return strtod(string, 0);
8555 #endif
8556 }
8557
8558 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result)
8559 {
8560 size_t length = static_cast<size_t>(end - begin);
8561 char_t* scratch = buffer;
8562
8563 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8564 {
8565 // need to make dummy on-heap copy
8566 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8567 if (!scratch) return false;
8568 }
8569
8570 // copy string to zero-terminated buffer and perform conversion
8571 memcpy(scratch, begin, length * sizeof(char_t));
8572 scratch[length] = 0;
8573
8574 *out_result = convert_string_to_number(scratch);
8575
8576 // free dummy buffer
8577 if (scratch != buffer) xml_memory::deallocate(scratch);
8578
8579 return true;
8580 }
8581
8582 PUGI__FN double round_nearest(double value)
8583 {
8584 return floor(value + 0.5);
8585 }
8586
8587 PUGI__FN double round_nearest_nzero(double value)
8588 {
8589 // same as round_nearest, but returns -0 for [-0.5, -0]
8590 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0)
8591 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5);
8592 }
8593
8594 PUGI__FN const char_t* qualified_name(const xpath_node& node)
8595 {
8596 return node.attribute() ? node.attribute().name() : node.node().name();
8597 }
8598
8599 PUGI__FN const char_t* local_name(const xpath_node& node)
8600 {
8601 const char_t* name = qualified_name(node);
8602 const char_t* p = find_char(name, ':');
8603
8604 return p ? p + 1 : name;
8605 }
8606
8607 struct namespace_uri_predicate
8608 {
8609 const char_t* prefix;
8610 size_t prefix_length;
8611
8612 namespace_uri_predicate(const char_t* name)
8613 {
8614 const char_t* pos = find_char(name, ':');
8615
8616 prefix = pos ? name : 0;
8617 prefix_length = pos ? static_cast<size_t>(pos - name) : 0;
8618 }
8619
8620 bool operator()(xml_attribute a) const
8621 {
8622 const char_t* name = a.name();
8623
8624 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false;
8625
8626 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0;
8627 }
8628 };
8629
8630 PUGI__FN const char_t* namespace_uri(xml_node node)
8631 {
8632 namespace_uri_predicate pred = node.name();
8633
8634 xml_node p = node;
8635
8636 while (p)
8637 {
8638 xml_attribute a = p.find_attribute(pred);
8639
8640 if (a) return a.value();
8641
8642 p = p.parent();
8643 }
8644
8645 return PUGIXML_TEXT("");
8646 }
8647
8648 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent)
8649 {
8650 namespace_uri_predicate pred = attr.name();
8651
8652 // Default namespace does not apply to attributes
8653 if (!pred.prefix) return PUGIXML_TEXT("");
8654
8655 xml_node p = parent;
8656
8657 while (p)
8658 {
8659 xml_attribute a = p.find_attribute(pred);
8660
8661 if (a) return a.value();
8662
8663 p = p.parent();
8664 }
8665
8666 return PUGIXML_TEXT("");
8667 }
8668
8669 PUGI__FN const char_t* namespace_uri(const xpath_node& node)
8670 {
8671 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node());
8672 }
8673
8674 PUGI__FN char_t* normalize_space(char_t* buffer)
8675 {
8676 char_t* write = buffer;
8677
8678 for (char_t* it = buffer; *it; )
8679 {
8680 char_t ch = *it++;
8681
8682 if (PUGI__IS_CHARTYPE(ch, ct_space))
8683 {
8684 // replace whitespace sequence with single space
8685 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++;
8686
8687 // avoid leading spaces
8688 if (write != buffer) *write++ = ' ';
8689 }
8690 else *write++ = ch;
8691 }
8692
8693 // remove trailing space
8694 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--;
8695
8696 // zero-terminate
8697 *write = 0;
8698
8699 return write;
8700 }
8701
8702 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length)
8703 {
8704 char_t* write = buffer;
8705
8706 while (*buffer)
8707 {
8708 PUGI__DMC_VOLATILE char_t ch = *buffer++;
8709
8710 const char_t* pos = find_char(from, ch);
8711
8712 if (!pos)
8713 *write++ = ch; // do not process
8714 else if (static_cast<size_t>(pos - from) < to_length)
8715 *write++ = to[pos - from]; // replace
8716 }
8717
8718 // zero-terminate
8719 *write = 0;
8720
8721 return write;
8722 }
8723
8724 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to)
8725 {
8726 unsigned char table[128] = {0};
8727
8728 while (*from)
8729 {
8730 unsigned int fc = static_cast<unsigned int>(*from);
8731 unsigned int tc = static_cast<unsigned int>(*to);
8732
8733 if (fc >= 128 || tc >= 128)
8734 return 0;
8735
8736 // code=128 means "skip character"
8737 if (!table[fc])
8738 table[fc] = static_cast<unsigned char>(tc ? tc : 128);
8739
8740 from++;
8741 if (tc) to++;
8742 }
8743
8744 for (int i = 0; i < 128; ++i)
8745 if (!table[i])
8746 table[i] = static_cast<unsigned char>(i);
8747
8748 void* result = alloc->allocate(sizeof(table));
8749 if (!result) return 0;
8750
8751 memcpy(result, table, sizeof(table));
8752
8753 return static_cast<unsigned char*>(result);
8754 }
8755
8756 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table)
8757 {
8758 char_t* write = buffer;
8759
8760 while (*buffer)
8761 {
8762 char_t ch = *buffer++;
8763 unsigned int index = static_cast<unsigned int>(ch);
8764
8765 if (index < 128)
8766 {
8767 unsigned char code = table[index];
8768
8769 // code=128 means "skip character" (table size is 128 so 128 can be a special value)
8770 // this code skips these characters without extra branches
8771 *write = static_cast<char_t>(code);
8772 write += 1 - (code >> 7);
8773 }
8774 else
8775 {
8776 *write++ = ch;
8777 }
8778 }
8779
8780 // zero-terminate
8781 *write = 0;
8782
8783 return write;
8784 }
8785
8786 inline bool is_xpath_attribute(const char_t* name)
8787 {
8788 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':'));
8789 }
8790
8791 struct xpath_variable_boolean: xpath_variable
8792 {
8793 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false)
8794 {
8795 }
8796
8797 bool value;
8798 char_t name[1];
8799 };
8800
8801 struct xpath_variable_number: xpath_variable
8802 {
8803 xpath_variable_number(): xpath_variable(xpath_type_number), value(0)
8804 {
8805 }
8806
8807 double value;
8808 char_t name[1];
8809 };
8810
8811 struct xpath_variable_string: xpath_variable
8812 {
8813 xpath_variable_string(): xpath_variable(xpath_type_string), value(0)
8814 {
8815 }
8816
8817 ~xpath_variable_string()
8818 {
8819 if (value) xml_memory::deallocate(value);
8820 }
8821
8822 char_t* value;
8823 char_t name[1];
8824 };
8825
8826 struct xpath_variable_node_set: xpath_variable
8827 {
8828 xpath_variable_node_set(): xpath_variable(xpath_type_node_set)
8829 {
8830 }
8831
8832 xpath_node_set value;
8833 char_t name[1];
8834 };
8835
8836 static const xpath_node_set dummy_node_set;
8837
8838 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str)
8839 {
8840 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time)
8841 unsigned int result = 0;
8842
8843 while (*str)
8844 {
8845 result += static_cast<unsigned int>(*str++);
8846 result += result << 10;
8847 result ^= result >> 6;
8848 }
8849
8850 result += result << 3;
8851 result ^= result >> 11;
8852 result += result << 15;
8853
8854 return result;
8855 }
8856
8857 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name)
8858 {
8859 size_t length = strlength(name);
8860 if (length == 0) return 0; // empty variable names are invalid
8861
8862 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters
8863 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t));
8864 if (!memory) return 0;
8865
8866 T* result = new (memory) T();
8867
8868 memcpy(result->name, name, (length + 1) * sizeof(char_t));
8869
8870 return result;
8871 }
8872
8873 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name)
8874 {
8875 switch (type)
8876 {
8877 case xpath_type_node_set:
8878 return new_xpath_variable<xpath_variable_node_set>(name);
8879
8880 case xpath_type_number:
8881 return new_xpath_variable<xpath_variable_number>(name);
8882
8883 case xpath_type_string:
8884 return new_xpath_variable<xpath_variable_string>(name);
8885
8886 case xpath_type_boolean:
8887 return new_xpath_variable<xpath_variable_boolean>(name);
8888
8889 default:
8890 return 0;
8891 }
8892 }
8893
8894 template <typename T> PUGI__FN void delete_xpath_variable(T* var)
8895 {
8896 var->~T();
8897 xml_memory::deallocate(var);
8898 }
8899
8900 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var)
8901 {
8902 switch (type)
8903 {
8904 case xpath_type_node_set:
8905 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var));
8906 break;
8907
8908 case xpath_type_number:
8909 delete_xpath_variable(static_cast<xpath_variable_number*>(var));
8910 break;
8911
8912 case xpath_type_string:
8913 delete_xpath_variable(static_cast<xpath_variable_string*>(var));
8914 break;
8915
8916 case xpath_type_boolean:
8917 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var));
8918 break;
8919
8920 default:
8921 assert(false && "Invalid variable type"); // unreachable
8922 }
8923 }
8924
8925 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs)
8926 {
8927 switch (rhs->type())
8928 {
8929 case xpath_type_node_set:
8930 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value);
8931
8932 case xpath_type_number:
8933 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value);
8934
8935 case xpath_type_string:
8936 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value);
8937
8938 case xpath_type_boolean:
8939 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value);
8940
8941 default:
8942 assert(false && "Invalid variable type"); // unreachable
8943 return false;
8944 }
8945 }
8946
8947 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result)
8948 {
8949 size_t length = static_cast<size_t>(end - begin);
8950 char_t* scratch = buffer;
8951
8952 if (length >= sizeof(buffer) / sizeof(buffer[0]))
8953 {
8954 // need to make dummy on-heap copy
8955 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t)));
8956 if (!scratch) return false;
8957 }
8958
8959 // copy string to zero-terminated buffer and perform lookup
8960 memcpy(scratch, begin, length * sizeof(char_t));
8961 scratch[length] = 0;
8962
8963 *out_result = set->get(scratch);
8964
8965 // free dummy buffer
8966 if (scratch != buffer) xml_memory::deallocate(scratch);
8967
8968 return true;
8969 }
8970 PUGI__NS_END
8971
8972 // Internal node set class
8973 PUGI__NS_BEGIN
8974 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end)
8975 {
8976 if (end - begin < 2)
8977 return xpath_node_set::type_sorted;
8978
8979 document_order_comparator cmp;
8980
8981 bool first = cmp(begin[0], begin[1]);
8982
8983 for (const xpath_node* it = begin + 1; it + 1 < end; ++it)
8984 if (cmp(it[0], it[1]) != first)
8985 return xpath_node_set::type_unsorted;
8986
8987 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse;
8988 }
8989
8990 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev)
8991 {
8992 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
8993
8994 if (type == xpath_node_set::type_unsorted)
8995 {
8996 xpath_node_set::type_t sorted = xpath_get_order(begin, end);
8997
8998 if (sorted == xpath_node_set::type_unsorted)
8999 {
9000 sort(begin, end, document_order_comparator());
9001
9002 type = xpath_node_set::type_sorted;
9003 }
9004 else
9005 type = sorted;
9006 }
9007
9008 if (type != order) reverse(begin, end);
9009
9010 return order;
9011 }
9012
9013 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type)
9014 {
9015 if (begin == end) return xpath_node();
9016
9017 switch (type)
9018 {
9019 case xpath_node_set::type_sorted:
9020 return *begin;
9021
9022 case xpath_node_set::type_sorted_reverse:
9023 return *(end - 1);
9024
9025 case xpath_node_set::type_unsorted:
9026 return *min_element(begin, end, document_order_comparator());
9027
9028 default:
9029 assert(false && "Invalid node set type"); // unreachable
9030 return xpath_node();
9031 }
9032 }
9033
9034 class xpath_node_set_raw
9035 {
9036 xpath_node_set::type_t _type;
9037
9038 xpath_node* _begin;
9039 xpath_node* _end;
9040 xpath_node* _eos;
9041
9042 public:
9043 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0)
9044 {
9045 }
9046
9047 xpath_node* begin() const
9048 {
9049 return _begin;
9050 }
9051
9052 xpath_node* end() const
9053 {
9054 return _end;
9055 }
9056
9057 bool empty() const
9058 {
9059 return _begin == _end;
9060 }
9061
9062 size_t size() const
9063 {
9064 return static_cast<size_t>(_end - _begin);
9065 }
9066
9067 xpath_node first() const
9068 {
9069 return xpath_first(_begin, _end, _type);
9070 }
9071
9072 void push_back_grow(const xpath_node& node, xpath_allocator* alloc);
9073
9074 void push_back(const xpath_node& node, xpath_allocator* alloc)
9075 {
9076 if (_end != _eos)
9077 *_end++ = node;
9078 else
9079 push_back_grow(node, alloc);
9080 }
9081
9082 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc)
9083 {
9084 if (begin_ == end_) return;
9085
9086 size_t size_ = static_cast<size_t>(_end - _begin);
9087 size_t capacity = static_cast<size_t>(_eos - _begin);
9088 size_t count = static_cast<size_t>(end_ - begin_);
9089
9090 if (size_ + count > capacity)
9091 {
9092 // reallocate the old array or allocate a new one
9093 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node)));
9094 if (!data) return;
9095
9096 // finalize
9097 _begin = data;
9098 _end = data + size_;
9099 _eos = data + size_ + count;
9100 }
9101
9102 memcpy(_end, begin_, count * sizeof(xpath_node));
9103 _end += count;
9104 }
9105
9106 void sort_do()
9107 {
9108 _type = xpath_sort(_begin, _end, _type, false);
9109 }
9110
9111 void truncate(xpath_node* pos)
9112 {
9113 assert(_begin <= pos && pos <= _end);
9114
9115 _end = pos;
9116 }
9117
9118 void remove_duplicates(xpath_allocator* alloc)
9119 {
9120 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2)
9121 {
9122 xpath_allocator_capture cr(alloc);
9123
9124 size_t size_ = static_cast<size_t>(_end - _begin);
9125
9126 size_t hash_size = 1;
9127 while (hash_size < size_ + size_ / 2) hash_size *= 2;
9128
9129 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**)));
9130 if (!hash_data) return;
9131
9132 memset(hash_data, 0, hash_size * sizeof(const void**));
9133
9134 xpath_node* write = _begin;
9135
9136 for (xpath_node* it = _begin; it != _end; ++it)
9137 {
9138 const void* attr = it->attribute().internal_object();
9139 const void* node = it->node().internal_object();
9140 const void* key = attr ? attr : node;
9141
9142 if (key && hash_insert(hash_data, hash_size, key))
9143 {
9144 *write++ = *it;
9145 }
9146 }
9147
9148 _end = write;
9149 }
9150 else
9151 {
9152 _end = unique(_begin, _end);
9153 }
9154 }
9155
9156 xpath_node_set::type_t type() const
9157 {
9158 return _type;
9159 }
9160
9161 void set_type(xpath_node_set::type_t value)
9162 {
9163 _type = value;
9164 }
9165 };
9166
9167 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc)
9168 {
9169 size_t capacity = static_cast<size_t>(_eos - _begin);
9170
9171 // get new capacity (1.5x rule)
9172 size_t new_capacity = capacity + capacity / 2 + 1;
9173
9174 // reallocate the old array or allocate a new one
9175 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node)));
9176 if (!data) return;
9177
9178 // finalize
9179 _begin = data;
9180 _end = data + capacity;
9181 _eos = data + new_capacity;
9182
9183 // push
9184 *_end++ = node;
9185 }
9186 PUGI__NS_END
9187
9188 PUGI__NS_BEGIN
9189 struct xpath_context
9190 {
9191 xpath_node n;
9192 size_t position, size;
9193
9194 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_)
9195 {
9196 }
9197 };
9198
9199 enum lexeme_t
9200 {
9201 lex_none = 0,
9202 lex_equal,
9203 lex_not_equal,
9204 lex_less,
9205 lex_greater,
9206 lex_less_or_equal,
9207 lex_greater_or_equal,
9208 lex_plus,
9209 lex_minus,
9210 lex_multiply,
9211 lex_union,
9212 lex_var_ref,
9213 lex_open_brace,
9214 lex_close_brace,
9215 lex_quoted_string,
9216 lex_number,
9217 lex_slash,
9218 lex_double_slash,
9219 lex_open_square_brace,
9220 lex_close_square_brace,
9221 lex_string,
9222 lex_comma,
9223 lex_axis_attribute,
9224 lex_dot,
9225 lex_double_dot,
9226 lex_double_colon,
9227 lex_eof
9228 };
9229
9230 struct xpath_lexer_string
9231 {
9232 const char_t* begin;
9233 const char_t* end;
9234
9235 xpath_lexer_string(): begin(0), end(0)
9236 {
9237 }
9238
9239 bool operator==(const char_t* other) const
9240 {
9241 size_t length = static_cast<size_t>(end - begin);
9242
9243 return strequalrange(other, begin, length);
9244 }
9245 };
9246
9247 class xpath_lexer
9248 {
9249 const char_t* _cur;
9250 const char_t* _cur_lexeme_pos;
9251 xpath_lexer_string _cur_lexeme_contents;
9252
9253 lexeme_t _cur_lexeme;
9254
9255 public:
9256 explicit xpath_lexer(const char_t* query): _cur(query)
9257 {
9258 next();
9259 }
9260
9261 const char_t* state() const
9262 {
9263 return _cur;
9264 }
9265
9266 void next()
9267 {
9268 const char_t* cur = _cur;
9269
9270 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur;
9271
9272 // save lexeme position for error reporting
9273 _cur_lexeme_pos = cur;
9274
9275 switch (*cur)
9276 {
9277 case 0:
9278 _cur_lexeme = lex_eof;
9279 break;
9280
9281 case '>':
9282 if (*(cur+1) == '=')
9283 {
9284 cur += 2;
9285 _cur_lexeme = lex_greater_or_equal;
9286 }
9287 else
9288 {
9289 cur += 1;
9290 _cur_lexeme = lex_greater;
9291 }
9292 break;
9293
9294 case '<':
9295 if (*(cur+1) == '=')
9296 {
9297 cur += 2;
9298 _cur_lexeme = lex_less_or_equal;
9299 }
9300 else
9301 {
9302 cur += 1;
9303 _cur_lexeme = lex_less;
9304 }
9305 break;
9306
9307 case '!':
9308 if (*(cur+1) == '=')
9309 {
9310 cur += 2;
9311 _cur_lexeme = lex_not_equal;
9312 }
9313 else
9314 {
9315 _cur_lexeme = lex_none;
9316 }
9317 break;
9318
9319 case '=':
9320 cur += 1;
9321 _cur_lexeme = lex_equal;
9322
9323 break;
9324
9325 case '+':
9326 cur += 1;
9327 _cur_lexeme = lex_plus;
9328
9329 break;
9330
9331 case '-':
9332 cur += 1;
9333 _cur_lexeme = lex_minus;
9334
9335 break;
9336
9337 case '*':
9338 cur += 1;
9339 _cur_lexeme = lex_multiply;
9340
9341 break;
9342
9343 case '|':
9344 cur += 1;
9345 _cur_lexeme = lex_union;
9346
9347 break;
9348
9349 case '$':
9350 cur += 1;
9351
9352 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9353 {
9354 _cur_lexeme_contents.begin = cur;
9355
9356 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9357
9358 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname
9359 {
9360 cur++; // :
9361
9362 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9363 }
9364
9365 _cur_lexeme_contents.end = cur;
9366
9367 _cur_lexeme = lex_var_ref;
9368 }
9369 else
9370 {
9371 _cur_lexeme = lex_none;
9372 }
9373
9374 break;
9375
9376 case '(':
9377 cur += 1;
9378 _cur_lexeme = lex_open_brace;
9379
9380 break;
9381
9382 case ')':
9383 cur += 1;
9384 _cur_lexeme = lex_close_brace;
9385
9386 break;
9387
9388 case '[':
9389 cur += 1;
9390 _cur_lexeme = lex_open_square_brace;
9391
9392 break;
9393
9394 case ']':
9395 cur += 1;
9396 _cur_lexeme = lex_close_square_brace;
9397
9398 break;
9399
9400 case ',':
9401 cur += 1;
9402 _cur_lexeme = lex_comma;
9403
9404 break;
9405
9406 case '/':
9407 if (*(cur+1) == '/')
9408 {
9409 cur += 2;
9410 _cur_lexeme = lex_double_slash;
9411 }
9412 else
9413 {
9414 cur += 1;
9415 _cur_lexeme = lex_slash;
9416 }
9417 break;
9418
9419 case '.':
9420 if (*(cur+1) == '.')
9421 {
9422 cur += 2;
9423 _cur_lexeme = lex_double_dot;
9424 }
9425 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit))
9426 {
9427 _cur_lexeme_contents.begin = cur; // .
9428
9429 ++cur;
9430
9431 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9432
9433 _cur_lexeme_contents.end = cur;
9434
9435 _cur_lexeme = lex_number;
9436 }
9437 else
9438 {
9439 cur += 1;
9440 _cur_lexeme = lex_dot;
9441 }
9442 break;
9443
9444 case '@':
9445 cur += 1;
9446 _cur_lexeme = lex_axis_attribute;
9447
9448 break;
9449
9450 case '"':
9451 case '\'':
9452 {
9453 char_t terminator = *cur;
9454
9455 ++cur;
9456
9457 _cur_lexeme_contents.begin = cur;
9458 while (*cur && *cur != terminator) cur++;
9459 _cur_lexeme_contents.end = cur;
9460
9461 if (!*cur)
9462 _cur_lexeme = lex_none;
9463 else
9464 {
9465 cur += 1;
9466 _cur_lexeme = lex_quoted_string;
9467 }
9468
9469 break;
9470 }
9471
9472 case ':':
9473 if (*(cur+1) == ':')
9474 {
9475 cur += 2;
9476 _cur_lexeme = lex_double_colon;
9477 }
9478 else
9479 {
9480 _cur_lexeme = lex_none;
9481 }
9482 break;
9483
9484 default:
9485 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit))
9486 {
9487 _cur_lexeme_contents.begin = cur;
9488
9489 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9490
9491 if (*cur == '.')
9492 {
9493 cur++;
9494
9495 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++;
9496 }
9497
9498 _cur_lexeme_contents.end = cur;
9499
9500 _cur_lexeme = lex_number;
9501 }
9502 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol))
9503 {
9504 _cur_lexeme_contents.begin = cur;
9505
9506 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9507
9508 if (cur[0] == ':')
9509 {
9510 if (cur[1] == '*') // namespace test ncname:*
9511 {
9512 cur += 2; // :*
9513 }
9514 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname
9515 {
9516 cur++; // :
9517
9518 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++;
9519 }
9520 }
9521
9522 _cur_lexeme_contents.end = cur;
9523
9524 _cur_lexeme = lex_string;
9525 }
9526 else
9527 {
9528 _cur_lexeme = lex_none;
9529 }
9530 }
9531
9532 _cur = cur;
9533 }
9534
9535 lexeme_t current() const
9536 {
9537 return _cur_lexeme;
9538 }
9539
9540 const char_t* current_pos() const
9541 {
9542 return _cur_lexeme_pos;
9543 }
9544
9545 const xpath_lexer_string& contents() const
9546 {
9547 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string);
9548
9549 return _cur_lexeme_contents;
9550 }
9551 };
9552
9553 enum ast_type_t
9554 {
9555 ast_unknown,
9556 ast_op_or, // left or right
9557 ast_op_and, // left and right
9558 ast_op_equal, // left = right
9559 ast_op_not_equal, // left != right
9560 ast_op_less, // left < right
9561 ast_op_greater, // left > right
9562 ast_op_less_or_equal, // left <= right
9563 ast_op_greater_or_equal, // left >= right
9564 ast_op_add, // left + right
9565 ast_op_subtract, // left - right
9566 ast_op_multiply, // left * right
9567 ast_op_divide, // left / right
9568 ast_op_mod, // left % right
9569 ast_op_negate, // left - right
9570 ast_op_union, // left | right
9571 ast_predicate, // apply predicate to set; next points to next predicate
9572 ast_filter, // select * from left where right
9573 ast_string_constant, // string constant
9574 ast_number_constant, // number constant
9575 ast_variable, // variable
9576 ast_func_last, // last()
9577 ast_func_position, // position()
9578 ast_func_count, // count(left)
9579 ast_func_id, // id(left)
9580 ast_func_local_name_0, // local-name()
9581 ast_func_local_name_1, // local-name(left)
9582 ast_func_namespace_uri_0, // namespace-uri()
9583 ast_func_namespace_uri_1, // namespace-uri(left)
9584 ast_func_name_0, // name()
9585 ast_func_name_1, // name(left)
9586 ast_func_string_0, // string()
9587 ast_func_string_1, // string(left)
9588 ast_func_concat, // concat(left, right, siblings)
9589 ast_func_starts_with, // starts_with(left, right)
9590 ast_func_contains, // contains(left, right)
9591 ast_func_substring_before, // substring-before(left, right)
9592 ast_func_substring_after, // substring-after(left, right)
9593 ast_func_substring_2, // substring(left, right)
9594 ast_func_substring_3, // substring(left, right, third)
9595 ast_func_string_length_0, // string-length()
9596 ast_func_string_length_1, // string-length(left)
9597 ast_func_normalize_space_0, // normalize-space()
9598 ast_func_normalize_space_1, // normalize-space(left)
9599 ast_func_translate, // translate(left, right, third)
9600 ast_func_boolean, // boolean(left)
9601 ast_func_not, // not(left)
9602 ast_func_true, // true()
9603 ast_func_false, // false()
9604 ast_func_lang, // lang(left)
9605 ast_func_number_0, // number()
9606 ast_func_number_1, // number(left)
9607 ast_func_sum, // sum(left)
9608 ast_func_floor, // floor(left)
9609 ast_func_ceiling, // ceiling(left)
9610 ast_func_round, // round(left)
9611 ast_step, // process set left with step
9612 ast_step_root, // select root node
9613
9614 ast_opt_translate_table, // translate(left, right, third) where right/third are constants
9615 ast_opt_compare_attribute // @name = 'string'
9616 };
9617
9618 enum axis_t
9619 {
9620 axis_ancestor,
9621 axis_ancestor_or_self,
9622 axis_attribute,
9623 axis_child,
9624 axis_descendant,
9625 axis_descendant_or_self,
9626 axis_following,
9627 axis_following_sibling,
9628 axis_namespace,
9629 axis_parent,
9630 axis_preceding,
9631 axis_preceding_sibling,
9632 axis_self
9633 };
9634
9635 enum nodetest_t
9636 {
9637 nodetest_none,
9638 nodetest_name,
9639 nodetest_type_node,
9640 nodetest_type_comment,
9641 nodetest_type_pi,
9642 nodetest_type_text,
9643 nodetest_pi,
9644 nodetest_all,
9645 nodetest_all_in_namespace
9646 };
9647
9648 enum predicate_t
9649 {
9650 predicate_default,
9651 predicate_posinv,
9652 predicate_constant,
9653 predicate_constant_one
9654 };
9655
9656 enum nodeset_eval_t
9657 {
9658 nodeset_eval_all,
9659 nodeset_eval_any,
9660 nodeset_eval_first
9661 };
9662
9663 template <axis_t N> struct axis_to_type
9664 {
9665 static const axis_t axis;
9666 };
9667
9668 template <axis_t N> const axis_t axis_to_type<N>::axis = N;
9669
9670 class xpath_ast_node
9671 {
9672 private:
9673 // node type
9674 char _type;
9675 char _rettype;
9676
9677 // for ast_step
9678 char _axis;
9679
9680 // for ast_step/ast_predicate/ast_filter
9681 char _test;
9682
9683 // tree node structure
9684 xpath_ast_node* _left;
9685 xpath_ast_node* _right;
9686 xpath_ast_node* _next;
9687
9688 union
9689 {
9690 // value for ast_string_constant
9691 const char_t* string;
9692 // value for ast_number_constant
9693 double number;
9694 // variable for ast_variable
9695 xpath_variable* variable;
9696 // node test for ast_step (node name/namespace/node type/pi target)
9697 const char_t* nodetest;
9698 // table for ast_opt_translate_table
9699 const unsigned char* table;
9700 } _data;
9701
9702 xpath_ast_node(const xpath_ast_node&);
9703 xpath_ast_node& operator=(const xpath_ast_node&);
9704
9705 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9706 {
9707 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9708
9709 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9710 {
9711 if (lt == xpath_type_boolean || rt == xpath_type_boolean)
9712 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9713 else if (lt == xpath_type_number || rt == xpath_type_number)
9714 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9715 else if (lt == xpath_type_string || rt == xpath_type_string)
9716 {
9717 xpath_allocator_capture cr(stack.result);
9718
9719 xpath_string ls = lhs->eval_string(c, stack);
9720 xpath_string rs = rhs->eval_string(c, stack);
9721
9722 return comp(ls, rs);
9723 }
9724 }
9725 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9726 {
9727 xpath_allocator_capture cr(stack.result);
9728
9729 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9730 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9731
9732 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9733 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9734 {
9735 xpath_allocator_capture cri(stack.result);
9736
9737 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result)))
9738 return true;
9739 }
9740
9741 return false;
9742 }
9743 else
9744 {
9745 if (lt == xpath_type_node_set)
9746 {
9747 swap(lhs, rhs);
9748 swap(lt, rt);
9749 }
9750
9751 if (lt == xpath_type_boolean)
9752 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack));
9753 else if (lt == xpath_type_number)
9754 {
9755 xpath_allocator_capture cr(stack.result);
9756
9757 double l = lhs->eval_number(c, stack);
9758 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9759
9760 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9761 {
9762 xpath_allocator_capture cri(stack.result);
9763
9764 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9765 return true;
9766 }
9767
9768 return false;
9769 }
9770 else if (lt == xpath_type_string)
9771 {
9772 xpath_allocator_capture cr(stack.result);
9773
9774 xpath_string l = lhs->eval_string(c, stack);
9775 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9776
9777 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9778 {
9779 xpath_allocator_capture cri(stack.result);
9780
9781 if (comp(l, string_value(*ri, stack.result)))
9782 return true;
9783 }
9784
9785 return false;
9786 }
9787 }
9788
9789 assert(false && "Wrong types"); // unreachable
9790 return false;
9791 }
9792
9793 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval)
9794 {
9795 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any;
9796 }
9797
9798 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp)
9799 {
9800 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype();
9801
9802 if (lt != xpath_type_node_set && rt != xpath_type_node_set)
9803 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack));
9804 else if (lt == xpath_type_node_set && rt == xpath_type_node_set)
9805 {
9806 xpath_allocator_capture cr(stack.result);
9807
9808 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9809 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9810
9811 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9812 {
9813 xpath_allocator_capture cri(stack.result);
9814
9815 double l = convert_string_to_number(string_value(*li, stack.result).c_str());
9816
9817 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9818 {
9819 xpath_allocator_capture crii(stack.result);
9820
9821 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9822 return true;
9823 }
9824 }
9825
9826 return false;
9827 }
9828 else if (lt != xpath_type_node_set && rt == xpath_type_node_set)
9829 {
9830 xpath_allocator_capture cr(stack.result);
9831
9832 double l = lhs->eval_number(c, stack);
9833 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all);
9834
9835 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri)
9836 {
9837 xpath_allocator_capture cri(stack.result);
9838
9839 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str())))
9840 return true;
9841 }
9842
9843 return false;
9844 }
9845 else if (lt == xpath_type_node_set && rt != xpath_type_node_set)
9846 {
9847 xpath_allocator_capture cr(stack.result);
9848
9849 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all);
9850 double r = rhs->eval_number(c, stack);
9851
9852 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li)
9853 {
9854 xpath_allocator_capture cri(stack.result);
9855
9856 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r))
9857 return true;
9858 }
9859
9860 return false;
9861 }
9862 else
9863 {
9864 assert(false && "Wrong types"); // unreachable
9865 return false;
9866 }
9867 }
9868
9869 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9870 {
9871 assert(ns.size() >= first);
9872 assert(expr->rettype() != xpath_type_number);
9873
9874 size_t i = 1;
9875 size_t size = ns.size() - first;
9876
9877 xpath_node* last = ns.begin() + first;
9878
9879 // remove_if... or well, sort of
9880 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9881 {
9882 xpath_context c(*it, i, size);
9883
9884 if (expr->eval_boolean(c, stack))
9885 {
9886 *last++ = *it;
9887
9888 if (once) break;
9889 }
9890 }
9891
9892 ns.truncate(last);
9893 }
9894
9895 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once)
9896 {
9897 assert(ns.size() >= first);
9898 assert(expr->rettype() == xpath_type_number);
9899
9900 size_t i = 1;
9901 size_t size = ns.size() - first;
9902
9903 xpath_node* last = ns.begin() + first;
9904
9905 // remove_if... or well, sort of
9906 for (xpath_node* it = last; it != ns.end(); ++it, ++i)
9907 {
9908 xpath_context c(*it, i, size);
9909
9910 if (expr->eval_number(c, stack) == static_cast<double>(i))
9911 {
9912 *last++ = *it;
9913
9914 if (once) break;
9915 }
9916 }
9917
9918 ns.truncate(last);
9919 }
9920
9921 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack)
9922 {
9923 assert(ns.size() >= first);
9924 assert(expr->rettype() == xpath_type_number);
9925
9926 size_t size = ns.size() - first;
9927
9928 xpath_node* last = ns.begin() + first;
9929
9930 xpath_context c(xpath_node(), 1, size);
9931
9932 double er = expr->eval_number(c, stack);
9933
9934 if (er >= 1.0 && er <= static_cast<double>(size))
9935 {
9936 size_t eri = static_cast<size_t>(er);
9937
9938 if (er == static_cast<double>(eri))
9939 {
9940 xpath_node r = last[eri - 1];
9941
9942 *last++ = r;
9943 }
9944 }
9945
9946 ns.truncate(last);
9947 }
9948
9949 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once)
9950 {
9951 if (ns.size() == first) return;
9952
9953 assert(_type == ast_filter || _type == ast_predicate);
9954
9955 if (_test == predicate_constant || _test == predicate_constant_one)
9956 apply_predicate_number_const(ns, first, _right, stack);
9957 else if (_right->rettype() == xpath_type_number)
9958 apply_predicate_number(ns, first, _right, stack, once);
9959 else
9960 apply_predicate_boolean(ns, first, _right, stack, once);
9961 }
9962
9963 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval)
9964 {
9965 if (ns.size() == first) return;
9966
9967 bool last_once = eval_once(ns.type(), eval);
9968
9969 for (xpath_ast_node* pred = _right; pred; pred = pred->_next)
9970 pred->apply_predicate(ns, first, stack, !pred->_next && last_once);
9971 }
9972
9973 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc)
9974 {
9975 assert(a);
9976
9977 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT("");
9978
9979 switch (_test)
9980 {
9981 case nodetest_name:
9982 if (strequal(name, _data.nodetest) && is_xpath_attribute(name))
9983 {
9984 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9985 return true;
9986 }
9987 break;
9988
9989 case nodetest_type_node:
9990 case nodetest_all:
9991 if (is_xpath_attribute(name))
9992 {
9993 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
9994 return true;
9995 }
9996 break;
9997
9998 case nodetest_all_in_namespace:
9999 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name))
10000 {
10001 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc);
10002 return true;
10003 }
10004 break;
10005
10006 default:
10007 ;
10008 }
10009
10010 return false;
10011 }
10012
10013 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc)
10014 {
10015 assert(n);
10016
10017 xml_node_type type = PUGI__NODETYPE(n);
10018
10019 switch (_test)
10020 {
10021 case nodetest_name:
10022 if (type == node_element && n->name && strequal(n->name, _data.nodetest))
10023 {
10024 ns.push_back(xml_node(n), alloc);
10025 return true;
10026 }
10027 break;
10028
10029 case nodetest_type_node:
10030 ns.push_back(xml_node(n), alloc);
10031 return true;
10032
10033 case nodetest_type_comment:
10034 if (type == node_comment)
10035 {
10036 ns.push_back(xml_node(n), alloc);
10037 return true;
10038 }
10039 break;
10040
10041 case nodetest_type_text:
10042 if (type == node_pcdata || type == node_cdata)
10043 {
10044 ns.push_back(xml_node(n), alloc);
10045 return true;
10046 }
10047 break;
10048
10049 case nodetest_type_pi:
10050 if (type == node_pi)
10051 {
10052 ns.push_back(xml_node(n), alloc);
10053 return true;
10054 }
10055 break;
10056
10057 case nodetest_pi:
10058 if (type == node_pi && n->name && strequal(n->name, _data.nodetest))
10059 {
10060 ns.push_back(xml_node(n), alloc);
10061 return true;
10062 }
10063 break;
10064
10065 case nodetest_all:
10066 if (type == node_element)
10067 {
10068 ns.push_back(xml_node(n), alloc);
10069 return true;
10070 }
10071 break;
10072
10073 case nodetest_all_in_namespace:
10074 if (type == node_element && n->name && starts_with(n->name, _data.nodetest))
10075 {
10076 ns.push_back(xml_node(n), alloc);
10077 return true;
10078 }
10079 break;
10080
10081 default:
10082 assert(false && "Unknown axis"); // unreachable
10083 }
10084
10085 return false;
10086 }
10087
10088 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T)
10089 {
10090 const axis_t axis = T::axis;
10091
10092 switch (axis)
10093 {
10094 case axis_attribute:
10095 {
10096 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute)
10097 if (step_push(ns, a, n, alloc) & once)
10098 return;
10099
10100 break;
10101 }
10102
10103 case axis_child:
10104 {
10105 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling)
10106 if (step_push(ns, c, alloc) & once)
10107 return;
10108
10109 break;
10110 }
10111
10112 case axis_descendant:
10113 case axis_descendant_or_self:
10114 {
10115 if (axis == axis_descendant_or_self)
10116 if (step_push(ns, n, alloc) & once)
10117 return;
10118
10119 xml_node_struct* cur = n->first_child;
10120
10121 while (cur)
10122 {
10123 if (step_push(ns, cur, alloc) & once)
10124 return;
10125
10126 if (cur->first_child)
10127 cur = cur->first_child;
10128 else
10129 {
10130 while (!cur->next_sibling)
10131 {
10132 cur = cur->parent;
10133
10134 if (cur == n) return;
10135 }
10136
10137 cur = cur->next_sibling;
10138 }
10139 }
10140
10141 break;
10142 }
10143
10144 case axis_following_sibling:
10145 {
10146 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling)
10147 if (step_push(ns, c, alloc) & once)
10148 return;
10149
10150 break;
10151 }
10152
10153 case axis_preceding_sibling:
10154 {
10155 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c)
10156 if (step_push(ns, c, alloc) & once)
10157 return;
10158
10159 break;
10160 }
10161
10162 case axis_following:
10163 {
10164 xml_node_struct* cur = n;
10165
10166 // exit from this node so that we don't include descendants
10167 while (!cur->next_sibling)
10168 {
10169 cur = cur->parent;
10170
10171 if (!cur) return;
10172 }
10173
10174 cur = cur->next_sibling;
10175
10176 while (cur)
10177 {
10178 if (step_push(ns, cur, alloc) & once)
10179 return;
10180
10181 if (cur->first_child)
10182 cur = cur->first_child;
10183 else
10184 {
10185 while (!cur->next_sibling)
10186 {
10187 cur = cur->parent;
10188
10189 if (!cur) return;
10190 }
10191
10192 cur = cur->next_sibling;
10193 }
10194 }
10195
10196 break;
10197 }
10198
10199 case axis_preceding:
10200 {
10201 xml_node_struct* cur = n;
10202
10203 // exit from this node so that we don't include descendants
10204 while (!cur->prev_sibling_c->next_sibling)
10205 {
10206 cur = cur->parent;
10207
10208 if (!cur) return;
10209 }
10210
10211 cur = cur->prev_sibling_c;
10212
10213 while (cur)
10214 {
10215 if (cur->first_child)
10216 cur = cur->first_child->prev_sibling_c;
10217 else
10218 {
10219 // leaf node, can't be ancestor
10220 if (step_push(ns, cur, alloc) & once)
10221 return;
10222
10223 while (!cur->prev_sibling_c->next_sibling)
10224 {
10225 cur = cur->parent;
10226
10227 if (!cur) return;
10228
10229 if (!node_is_ancestor(cur, n))
10230 if (step_push(ns, cur, alloc) & once)
10231 return;
10232 }
10233
10234 cur = cur->prev_sibling_c;
10235 }
10236 }
10237
10238 break;
10239 }
10240
10241 case axis_ancestor:
10242 case axis_ancestor_or_self:
10243 {
10244 if (axis == axis_ancestor_or_self)
10245 if (step_push(ns, n, alloc) & once)
10246 return;
10247
10248 xml_node_struct* cur = n->parent;
10249
10250 while (cur)
10251 {
10252 if (step_push(ns, cur, alloc) & once)
10253 return;
10254
10255 cur = cur->parent;
10256 }
10257
10258 break;
10259 }
10260
10261 case axis_self:
10262 {
10263 step_push(ns, n, alloc);
10264
10265 break;
10266 }
10267
10268 case axis_parent:
10269 {
10270 if (n->parent)
10271 step_push(ns, n->parent, alloc);
10272
10273 break;
10274 }
10275
10276 default:
10277 assert(false && "Unimplemented axis"); // unreachable
10278 }
10279 }
10280
10281 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v)
10282 {
10283 const axis_t axis = T::axis;
10284
10285 switch (axis)
10286 {
10287 case axis_ancestor:
10288 case axis_ancestor_or_self:
10289 {
10290 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test
10291 if (step_push(ns, a, p, alloc) & once)
10292 return;
10293
10294 xml_node_struct* cur = p;
10295
10296 while (cur)
10297 {
10298 if (step_push(ns, cur, alloc) & once)
10299 return;
10300
10301 cur = cur->parent;
10302 }
10303
10304 break;
10305 }
10306
10307 case axis_descendant_or_self:
10308 case axis_self:
10309 {
10310 if (_test == nodetest_type_node) // reject attributes based on principal node type test
10311 step_push(ns, a, p, alloc);
10312
10313 break;
10314 }
10315
10316 case axis_following:
10317 {
10318 xml_node_struct* cur = p;
10319
10320 while (cur)
10321 {
10322 if (cur->first_child)
10323 cur = cur->first_child;
10324 else
10325 {
10326 while (!cur->next_sibling)
10327 {
10328 cur = cur->parent;
10329
10330 if (!cur) return;
10331 }
10332
10333 cur = cur->next_sibling;
10334 }
10335
10336 if (step_push(ns, cur, alloc) & once)
10337 return;
10338 }
10339
10340 break;
10341 }
10342
10343 case axis_parent:
10344 {
10345 step_push(ns, p, alloc);
10346
10347 break;
10348 }
10349
10350 case axis_preceding:
10351 {
10352 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding
10353 step_fill(ns, p, alloc, once, v);
10354 break;
10355 }
10356
10357 default:
10358 assert(false && "Unimplemented axis"); // unreachable
10359 }
10360 }
10361
10362 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v)
10363 {
10364 const axis_t axis = T::axis;
10365 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self);
10366
10367 if (xn.node())
10368 step_fill(ns, xn.node().internal_object(), alloc, once, v);
10369 else if (axis_has_attributes && xn.attribute() && xn.parent())
10370 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v);
10371 }
10372
10373 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v)
10374 {
10375 const axis_t axis = T::axis;
10376 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling);
10377 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted;
10378
10379 bool once =
10380 (axis == axis_attribute && _test == nodetest_name) ||
10381 (!_right && eval_once(axis_type, eval)) ||
10382 // coverity[mixed_enums]
10383 (_right && !_right->_next && _right->_test == predicate_constant_one);
10384
10385 xpath_node_set_raw ns;
10386 ns.set_type(axis_type);
10387
10388 if (_left)
10389 {
10390 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all);
10391
10392 // self axis preserves the original order
10393 if (axis == axis_self) ns.set_type(s.type());
10394
10395 for (const xpath_node* it = s.begin(); it != s.end(); ++it)
10396 {
10397 size_t size = ns.size();
10398
10399 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes
10400 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted);
10401
10402 step_fill(ns, *it, stack.result, once, v);
10403 if (_right) apply_predicates(ns, size, stack, eval);
10404 }
10405 }
10406 else
10407 {
10408 step_fill(ns, c.n, stack.result, once, v);
10409 if (_right) apply_predicates(ns, 0, stack, eval);
10410 }
10411
10412 // child, attribute and self axes always generate unique set of nodes
10413 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice
10414 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted)
10415 ns.remove_duplicates(stack.temp);
10416
10417 return ns;
10418 }
10419
10420 public:
10421 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value):
10422 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10423 {
10424 assert(type == ast_string_constant);
10425 _data.string = value;
10426 }
10427
10428 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value):
10429 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10430 {
10431 assert(type == ast_number_constant);
10432 _data.number = value;
10433 }
10434
10435 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value):
10436 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0)
10437 {
10438 assert(type == ast_variable);
10439 _data.variable = value;
10440 }
10441
10442 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0):
10443 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0)
10444 {
10445 }
10446
10447 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents):
10448 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0)
10449 {
10450 assert(type == ast_step);
10451 _data.nodetest = contents;
10452 }
10453
10454 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test):
10455 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0)
10456 {
10457 assert(type == ast_filter || type == ast_predicate);
10458 }
10459
10460 void set_next(xpath_ast_node* value)
10461 {
10462 _next = value;
10463 }
10464
10465 void set_right(xpath_ast_node* value)
10466 {
10467 _right = value;
10468 }
10469
10470 bool eval_boolean(const xpath_context& c, const xpath_stack& stack)
10471 {
10472 switch (_type)
10473 {
10474 case ast_op_or:
10475 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack);
10476
10477 case ast_op_and:
10478 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack);
10479
10480 case ast_op_equal:
10481 return compare_eq(_left, _right, c, stack, equal_to());
10482
10483 case ast_op_not_equal:
10484 return compare_eq(_left, _right, c, stack, not_equal_to());
10485
10486 case ast_op_less:
10487 return compare_rel(_left, _right, c, stack, less());
10488
10489 case ast_op_greater:
10490 return compare_rel(_right, _left, c, stack, less());
10491
10492 case ast_op_less_or_equal:
10493 return compare_rel(_left, _right, c, stack, less_equal());
10494
10495 case ast_op_greater_or_equal:
10496 return compare_rel(_right, _left, c, stack, less_equal());
10497
10498 case ast_func_starts_with:
10499 {
10500 xpath_allocator_capture cr(stack.result);
10501
10502 xpath_string lr = _left->eval_string(c, stack);
10503 xpath_string rr = _right->eval_string(c, stack);
10504
10505 return starts_with(lr.c_str(), rr.c_str());
10506 }
10507
10508 case ast_func_contains:
10509 {
10510 xpath_allocator_capture cr(stack.result);
10511
10512 xpath_string lr = _left->eval_string(c, stack);
10513 xpath_string rr = _right->eval_string(c, stack);
10514
10515 return find_substring(lr.c_str(), rr.c_str()) != 0;
10516 }
10517
10518 case ast_func_boolean:
10519 return _left->eval_boolean(c, stack);
10520
10521 case ast_func_not:
10522 return !_left->eval_boolean(c, stack);
10523
10524 case ast_func_true:
10525 return true;
10526
10527 case ast_func_false:
10528 return false;
10529
10530 case ast_func_lang:
10531 {
10532 if (c.n.attribute()) return false;
10533
10534 xpath_allocator_capture cr(stack.result);
10535
10536 xpath_string lang = _left->eval_string(c, stack);
10537
10538 for (xml_node n = c.n.node(); n; n = n.parent())
10539 {
10540 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang"));
10541
10542 if (a)
10543 {
10544 const char_t* value = a.value();
10545
10546 // strnicmp / strncasecmp is not portable
10547 for (const char_t* lit = lang.c_str(); *lit; ++lit)
10548 {
10549 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false;
10550 ++value;
10551 }
10552
10553 return *value == 0 || *value == '-';
10554 }
10555 }
10556
10557 return false;
10558 }
10559
10560 case ast_opt_compare_attribute:
10561 {
10562 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string();
10563
10564 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest);
10565
10566 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name());
10567 }
10568
10569 case ast_variable:
10570 {
10571 assert(_rettype == _data.variable->type());
10572
10573 if (_rettype == xpath_type_boolean)
10574 return _data.variable->get_boolean();
10575
10576 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10577 break;
10578 }
10579
10580 default:
10581 ;
10582 }
10583
10584 // none of the ast types that return the value directly matched, we need to perform type conversion
10585 switch (_rettype)
10586 {
10587 case xpath_type_number:
10588 return convert_number_to_boolean(eval_number(c, stack));
10589
10590 case xpath_type_string:
10591 {
10592 xpath_allocator_capture cr(stack.result);
10593
10594 return !eval_string(c, stack).empty();
10595 }
10596
10597 case xpath_type_node_set:
10598 {
10599 xpath_allocator_capture cr(stack.result);
10600
10601 return !eval_node_set(c, stack, nodeset_eval_any).empty();
10602 }
10603
10604 default:
10605 assert(false && "Wrong expression for return type boolean"); // unreachable
10606 return false;
10607 }
10608 }
10609
10610 double eval_number(const xpath_context& c, const xpath_stack& stack)
10611 {
10612 switch (_type)
10613 {
10614 case ast_op_add:
10615 return _left->eval_number(c, stack) + _right->eval_number(c, stack);
10616
10617 case ast_op_subtract:
10618 return _left->eval_number(c, stack) - _right->eval_number(c, stack);
10619
10620 case ast_op_multiply:
10621 return _left->eval_number(c, stack) * _right->eval_number(c, stack);
10622
10623 case ast_op_divide:
10624 return _left->eval_number(c, stack) / _right->eval_number(c, stack);
10625
10626 case ast_op_mod:
10627 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack));
10628
10629 case ast_op_negate:
10630 return -_left->eval_number(c, stack);
10631
10632 case ast_number_constant:
10633 return _data.number;
10634
10635 case ast_func_last:
10636 return static_cast<double>(c.size);
10637
10638 case ast_func_position:
10639 return static_cast<double>(c.position);
10640
10641 case ast_func_count:
10642 {
10643 xpath_allocator_capture cr(stack.result);
10644
10645 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size());
10646 }
10647
10648 case ast_func_string_length_0:
10649 {
10650 xpath_allocator_capture cr(stack.result);
10651
10652 return static_cast<double>(string_value(c.n, stack.result).length());
10653 }
10654
10655 case ast_func_string_length_1:
10656 {
10657 xpath_allocator_capture cr(stack.result);
10658
10659 return static_cast<double>(_left->eval_string(c, stack).length());
10660 }
10661
10662 case ast_func_number_0:
10663 {
10664 xpath_allocator_capture cr(stack.result);
10665
10666 return convert_string_to_number(string_value(c.n, stack.result).c_str());
10667 }
10668
10669 case ast_func_number_1:
10670 return _left->eval_number(c, stack);
10671
10672 case ast_func_sum:
10673 {
10674 xpath_allocator_capture cr(stack.result);
10675
10676 double r = 0;
10677
10678 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all);
10679
10680 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it)
10681 {
10682 xpath_allocator_capture cri(stack.result);
10683
10684 r += convert_string_to_number(string_value(*it, stack.result).c_str());
10685 }
10686
10687 return r;
10688 }
10689
10690 case ast_func_floor:
10691 {
10692 double r = _left->eval_number(c, stack);
10693
10694 return r == r ? floor(r) : r;
10695 }
10696
10697 case ast_func_ceiling:
10698 {
10699 double r = _left->eval_number(c, stack);
10700
10701 return r == r ? ceil(r) : r;
10702 }
10703
10704 case ast_func_round:
10705 return round_nearest_nzero(_left->eval_number(c, stack));
10706
10707 case ast_variable:
10708 {
10709 assert(_rettype == _data.variable->type());
10710
10711 if (_rettype == xpath_type_number)
10712 return _data.variable->get_number();
10713
10714 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
10715 break;
10716 }
10717
10718 default:
10719 ;
10720 }
10721
10722 // none of the ast types that return the value directly matched, we need to perform type conversion
10723 switch (_rettype)
10724 {
10725 case xpath_type_boolean:
10726 return eval_boolean(c, stack) ? 1 : 0;
10727
10728 case xpath_type_string:
10729 {
10730 xpath_allocator_capture cr(stack.result);
10731
10732 return convert_string_to_number(eval_string(c, stack).c_str());
10733 }
10734
10735 case xpath_type_node_set:
10736 {
10737 xpath_allocator_capture cr(stack.result);
10738
10739 return convert_string_to_number(eval_string(c, stack).c_str());
10740 }
10741
10742 default:
10743 assert(false && "Wrong expression for return type number"); // unreachable
10744 return 0;
10745 }
10746 }
10747
10748 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack)
10749 {
10750 assert(_type == ast_func_concat);
10751
10752 xpath_allocator_capture ct(stack.temp);
10753
10754 // count the string number
10755 size_t count = 1;
10756 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++;
10757
10758 // allocate a buffer for temporary string objects
10759 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string)));
10760 if (!buffer) return xpath_string();
10761
10762 // evaluate all strings to temporary stack
10763 xpath_stack swapped_stack = {stack.temp, stack.result};
10764
10765 buffer[0] = _left->eval_string(c, swapped_stack);
10766
10767 size_t pos = 1;
10768 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack);
10769 assert(pos == count);
10770
10771 // get total length
10772 size_t length = 0;
10773 for (size_t i = 0; i < count; ++i) length += buffer[i].length();
10774
10775 // create final string
10776 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t)));
10777 if (!result) return xpath_string();
10778
10779 char_t* ri = result;
10780
10781 for (size_t j = 0; j < count; ++j)
10782 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi)
10783 *ri++ = *bi;
10784
10785 *ri = 0;
10786
10787 return xpath_string::from_heap_preallocated(result, ri);
10788 }
10789
10790 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack)
10791 {
10792 switch (_type)
10793 {
10794 case ast_string_constant:
10795 return xpath_string::from_const(_data.string);
10796
10797 case ast_func_local_name_0:
10798 {
10799 xpath_node na = c.n;
10800
10801 return xpath_string::from_const(local_name(na));
10802 }
10803
10804 case ast_func_local_name_1:
10805 {
10806 xpath_allocator_capture cr(stack.result);
10807
10808 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10809 xpath_node na = ns.first();
10810
10811 return xpath_string::from_const(local_name(na));
10812 }
10813
10814 case ast_func_name_0:
10815 {
10816 xpath_node na = c.n;
10817
10818 return xpath_string::from_const(qualified_name(na));
10819 }
10820
10821 case ast_func_name_1:
10822 {
10823 xpath_allocator_capture cr(stack.result);
10824
10825 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10826 xpath_node na = ns.first();
10827
10828 return xpath_string::from_const(qualified_name(na));
10829 }
10830
10831 case ast_func_namespace_uri_0:
10832 {
10833 xpath_node na = c.n;
10834
10835 return xpath_string::from_const(namespace_uri(na));
10836 }
10837
10838 case ast_func_namespace_uri_1:
10839 {
10840 xpath_allocator_capture cr(stack.result);
10841
10842 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first);
10843 xpath_node na = ns.first();
10844
10845 return xpath_string::from_const(namespace_uri(na));
10846 }
10847
10848 case ast_func_string_0:
10849 return string_value(c.n, stack.result);
10850
10851 case ast_func_string_1:
10852 return _left->eval_string(c, stack);
10853
10854 case ast_func_concat:
10855 return eval_string_concat(c, stack);
10856
10857 case ast_func_substring_before:
10858 {
10859 xpath_allocator_capture cr(stack.temp);
10860
10861 xpath_stack swapped_stack = {stack.temp, stack.result};
10862
10863 xpath_string s = _left->eval_string(c, swapped_stack);
10864 xpath_string p = _right->eval_string(c, swapped_stack);
10865
10866 const char_t* pos = find_substring(s.c_str(), p.c_str());
10867
10868 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string();
10869 }
10870
10871 case ast_func_substring_after:
10872 {
10873 xpath_allocator_capture cr(stack.temp);
10874
10875 xpath_stack swapped_stack = {stack.temp, stack.result};
10876
10877 xpath_string s = _left->eval_string(c, swapped_stack);
10878 xpath_string p = _right->eval_string(c, swapped_stack);
10879
10880 const char_t* pos = find_substring(s.c_str(), p.c_str());
10881 if (!pos) return xpath_string();
10882
10883 const char_t* rbegin = pos + p.length();
10884 const char_t* rend = s.c_str() + s.length();
10885
10886 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10887 }
10888
10889 case ast_func_substring_2:
10890 {
10891 xpath_allocator_capture cr(stack.temp);
10892
10893 xpath_stack swapped_stack = {stack.temp, stack.result};
10894
10895 xpath_string s = _left->eval_string(c, swapped_stack);
10896 size_t s_length = s.length();
10897
10898 double first = round_nearest(_right->eval_number(c, stack));
10899
10900 if (is_nan(first)) return xpath_string(); // NaN
10901 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10902
10903 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10904 assert(1 <= pos && pos <= s_length + 1);
10905
10906 const char_t* rbegin = s.c_str() + (pos - 1);
10907 const char_t* rend = s.c_str() + s.length();
10908
10909 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin);
10910 }
10911
10912 case ast_func_substring_3:
10913 {
10914 xpath_allocator_capture cr(stack.temp);
10915
10916 xpath_stack swapped_stack = {stack.temp, stack.result};
10917
10918 xpath_string s = _left->eval_string(c, swapped_stack);
10919 size_t s_length = s.length();
10920
10921 double first = round_nearest(_right->eval_number(c, stack));
10922 double last = first + round_nearest(_right->_next->eval_number(c, stack));
10923
10924 if (is_nan(first) || is_nan(last)) return xpath_string();
10925 else if (first >= static_cast<double>(s_length + 1)) return xpath_string();
10926 else if (first >= last) return xpath_string();
10927 else if (last < 1) return xpath_string();
10928
10929 size_t pos = first < 1 ? 1 : static_cast<size_t>(first);
10930 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last);
10931
10932 assert(1 <= pos && pos <= end && end <= s_length + 1);
10933 const char_t* rbegin = s.c_str() + (pos - 1);
10934 const char_t* rend = s.c_str() + (end - 1);
10935
10936 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result);
10937 }
10938
10939 case ast_func_normalize_space_0:
10940 {
10941 xpath_string s = string_value(c.n, stack.result);
10942
10943 char_t* begin = s.data(stack.result);
10944 if (!begin) return xpath_string();
10945
10946 char_t* end = normalize_space(begin);
10947
10948 return xpath_string::from_heap_preallocated(begin, end);
10949 }
10950
10951 case ast_func_normalize_space_1:
10952 {
10953 xpath_string s = _left->eval_string(c, stack);
10954
10955 char_t* begin = s.data(stack.result);
10956 if (!begin) return xpath_string();
10957
10958 char_t* end = normalize_space(begin);
10959
10960 return xpath_string::from_heap_preallocated(begin, end);
10961 }
10962
10963 case ast_func_translate:
10964 {
10965 xpath_allocator_capture cr(stack.temp);
10966
10967 xpath_stack swapped_stack = {stack.temp, stack.result};
10968
10969 xpath_string s = _left->eval_string(c, stack);
10970 xpath_string from = _right->eval_string(c, swapped_stack);
10971 xpath_string to = _right->_next->eval_string(c, swapped_stack);
10972
10973 char_t* begin = s.data(stack.result);
10974 if (!begin) return xpath_string();
10975
10976 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length());
10977
10978 return xpath_string::from_heap_preallocated(begin, end);
10979 }
10980
10981 case ast_opt_translate_table:
10982 {
10983 xpath_string s = _left->eval_string(c, stack);
10984
10985 char_t* begin = s.data(stack.result);
10986 if (!begin) return xpath_string();
10987
10988 char_t* end = translate_table(begin, _data.table);
10989
10990 return xpath_string::from_heap_preallocated(begin, end);
10991 }
10992
10993 case ast_variable:
10994 {
10995 assert(_rettype == _data.variable->type());
10996
10997 if (_rettype == xpath_type_string)
10998 return xpath_string::from_const(_data.variable->get_string());
10999
11000 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11001 break;
11002 }
11003
11004 default:
11005 ;
11006 }
11007
11008 // none of the ast types that return the value directly matched, we need to perform type conversion
11009 switch (_rettype)
11010 {
11011 case xpath_type_boolean:
11012 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"));
11013
11014 case xpath_type_number:
11015 return convert_number_to_string(eval_number(c, stack), stack.result);
11016
11017 case xpath_type_node_set:
11018 {
11019 xpath_allocator_capture cr(stack.temp);
11020
11021 xpath_stack swapped_stack = {stack.temp, stack.result};
11022
11023 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first);
11024 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result);
11025 }
11026
11027 default:
11028 assert(false && "Wrong expression for return type string"); // unreachable
11029 return xpath_string();
11030 }
11031 }
11032
11033 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval)
11034 {
11035 switch (_type)
11036 {
11037 case ast_op_union:
11038 {
11039 xpath_allocator_capture cr(stack.temp);
11040
11041 xpath_stack swapped_stack = {stack.temp, stack.result};
11042
11043 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval);
11044 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval);
11045
11046 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother
11047 ls.set_type(xpath_node_set::type_unsorted);
11048
11049 ls.append(rs.begin(), rs.end(), stack.result);
11050 ls.remove_duplicates(stack.temp);
11051
11052 return ls;
11053 }
11054
11055 case ast_filter:
11056 {
11057 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all);
11058
11059 // either expression is a number or it contains position() call; sort by document order
11060 if (_test != predicate_posinv) set.sort_do();
11061
11062 bool once = eval_once(set.type(), eval);
11063
11064 apply_predicate(set, 0, stack, once);
11065
11066 return set;
11067 }
11068
11069 case ast_func_id:
11070 return xpath_node_set_raw();
11071
11072 case ast_step:
11073 {
11074 switch (_axis)
11075 {
11076 case axis_ancestor:
11077 return step_do(c, stack, eval, axis_to_type<axis_ancestor>());
11078
11079 case axis_ancestor_or_self:
11080 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>());
11081
11082 case axis_attribute:
11083 return step_do(c, stack, eval, axis_to_type<axis_attribute>());
11084
11085 case axis_child:
11086 return step_do(c, stack, eval, axis_to_type<axis_child>());
11087
11088 case axis_descendant:
11089 return step_do(c, stack, eval, axis_to_type<axis_descendant>());
11090
11091 case axis_descendant_or_self:
11092 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>());
11093
11094 case axis_following:
11095 return step_do(c, stack, eval, axis_to_type<axis_following>());
11096
11097 case axis_following_sibling:
11098 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>());
11099
11100 case axis_namespace:
11101 // namespaced axis is not supported
11102 return xpath_node_set_raw();
11103
11104 case axis_parent:
11105 return step_do(c, stack, eval, axis_to_type<axis_parent>());
11106
11107 case axis_preceding:
11108 return step_do(c, stack, eval, axis_to_type<axis_preceding>());
11109
11110 case axis_preceding_sibling:
11111 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>());
11112
11113 case axis_self:
11114 return step_do(c, stack, eval, axis_to_type<axis_self>());
11115
11116 default:
11117 assert(false && "Unknown axis"); // unreachable
11118 return xpath_node_set_raw();
11119 }
11120 }
11121
11122 case ast_step_root:
11123 {
11124 assert(!_right); // root step can't have any predicates
11125
11126 xpath_node_set_raw ns;
11127
11128 ns.set_type(xpath_node_set::type_sorted);
11129
11130 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result);
11131 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result);
11132
11133 return ns;
11134 }
11135
11136 case ast_variable:
11137 {
11138 assert(_rettype == _data.variable->type());
11139
11140 if (_rettype == xpath_type_node_set)
11141 {
11142 const xpath_node_set& s = _data.variable->get_node_set();
11143
11144 xpath_node_set_raw ns;
11145
11146 ns.set_type(s.type());
11147 ns.append(s.begin(), s.end(), stack.result);
11148
11149 return ns;
11150 }
11151
11152 // variable needs to be converted to the correct type, this is handled by the fallthrough block below
11153 break;
11154 }
11155
11156 default:
11157 ;
11158 }
11159
11160 // none of the ast types that return the value directly matched, but conversions to node set are invalid
11161 assert(false && "Wrong expression for return type node set"); // unreachable
11162 return xpath_node_set_raw();
11163 }
11164
11165 void optimize(xpath_allocator* alloc)
11166 {
11167 if (_left)
11168 _left->optimize(alloc);
11169
11170 if (_right)
11171 _right->optimize(alloc);
11172
11173 if (_next)
11174 _next->optimize(alloc);
11175
11176 // coverity[var_deref_model]
11177 optimize_self(alloc);
11178 }
11179
11180 void optimize_self(xpath_allocator* alloc)
11181 {
11182 // Rewrite [position()=expr] with [expr]
11183 // Note that this step has to go before classification to recognize [position()=1]
11184 if ((_type == ast_filter || _type == ast_predicate) &&
11185 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11186 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number)
11187 {
11188 _right = _right->_right;
11189 }
11190
11191 // Classify filter/predicate ops to perform various optimizations during evaluation
11192 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate)
11193 {
11194 assert(_test == predicate_default);
11195
11196 if (_right->_type == ast_number_constant && _right->_data.number == 1.0)
11197 _test = predicate_constant_one;
11198 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last))
11199 _test = predicate_constant;
11200 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr())
11201 _test = predicate_posinv;
11202 }
11203
11204 // Rewrite descendant-or-self::node()/child::foo with descendant::foo
11205 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately
11206 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes
11207 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1])
11208 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) &&
11209 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right &&
11210 is_posinv_step())
11211 {
11212 if (_axis == axis_child || _axis == axis_descendant)
11213 _axis = axis_descendant;
11214 else
11215 _axis = axis_descendant_or_self;
11216
11217 _left = _left->_left;
11218 }
11219
11220 // Use optimized lookup table implementation for translate() with constant arguments
11221 if (_type == ast_func_translate &&
11222 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate)
11223 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant)
11224 {
11225 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string);
11226
11227 if (table)
11228 {
11229 _type = ast_opt_translate_table;
11230 _data.table = table;
11231 }
11232 }
11233
11234 // Use optimized path for @attr = 'value' or @attr = $value
11235 if (_type == ast_op_equal &&
11236 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal)
11237 // coverity[mixed_enums]
11238 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right &&
11239 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string)))
11240 {
11241 _type = ast_opt_compare_attribute;
11242 }
11243 }
11244
11245 bool is_posinv_expr() const
11246 {
11247 switch (_type)
11248 {
11249 case ast_func_position:
11250 case ast_func_last:
11251 return false;
11252
11253 case ast_string_constant:
11254 case ast_number_constant:
11255 case ast_variable:
11256 return true;
11257
11258 case ast_step:
11259 case ast_step_root:
11260 return true;
11261
11262 case ast_predicate:
11263 case ast_filter:
11264 return true;
11265
11266 default:
11267 if (_left && !_left->is_posinv_expr()) return false;
11268
11269 for (xpath_ast_node* n = _right; n; n = n->_next)
11270 if (!n->is_posinv_expr()) return false;
11271
11272 return true;
11273 }
11274 }
11275
11276 bool is_posinv_step() const
11277 {
11278 assert(_type == ast_step);
11279
11280 for (xpath_ast_node* n = _right; n; n = n->_next)
11281 {
11282 assert(n->_type == ast_predicate);
11283
11284 if (n->_test != predicate_posinv)
11285 return false;
11286 }
11287
11288 return true;
11289 }
11290
11291 xpath_value_type rettype() const
11292 {
11293 return static_cast<xpath_value_type>(_rettype);
11294 }
11295 };
11296
11297 static const size_t xpath_ast_depth_limit =
11298 #ifdef PUGIXML_XPATH_DEPTH_LIMIT
11299 PUGIXML_XPATH_DEPTH_LIMIT
11300 #else
11301 1024
11302 #endif
11303 ;
11304
11305 struct xpath_parser
11306 {
11307 xpath_allocator* _alloc;
11308 xpath_lexer _lexer;
11309
11310 const char_t* _query;
11311 xpath_variable_set* _variables;
11312
11313 xpath_parse_result* _result;
11314
11315 char_t _scratch[32];
11316
11317 size_t _depth;
11318
11319 xpath_ast_node* error(const char* message)
11320 {
11321 _result->error = message;
11322 _result->offset = _lexer.current_pos() - _query;
11323
11324 return 0;
11325 }
11326
11327 xpath_ast_node* error_oom()
11328 {
11329 assert(_alloc->_error);
11330 *_alloc->_error = true;
11331
11332 return 0;
11333 }
11334
11335 xpath_ast_node* error_rec()
11336 {
11337 return error("Exceeded maximum allowed query depth");
11338 }
11339
11340 void* alloc_node()
11341 {
11342 return _alloc->allocate(sizeof(xpath_ast_node));
11343 }
11344
11345 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value)
11346 {
11347 void* memory = alloc_node();
11348 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11349 }
11350
11351 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value)
11352 {
11353 void* memory = alloc_node();
11354 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11355 }
11356
11357 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value)
11358 {
11359 void* memory = alloc_node();
11360 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0;
11361 }
11362
11363 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0)
11364 {
11365 void* memory = alloc_node();
11366 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0;
11367 }
11368
11369 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents)
11370 {
11371 void* memory = alloc_node();
11372 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0;
11373 }
11374
11375 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test)
11376 {
11377 void* memory = alloc_node();
11378 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0;
11379 }
11380
11381 const char_t* alloc_string(const xpath_lexer_string& value)
11382 {
11383 if (!value.begin)
11384 return PUGIXML_TEXT("");
11385
11386 size_t length = static_cast<size_t>(value.end - value.begin);
11387
11388 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t)));
11389 if (!c) return 0;
11390
11391 memcpy(c, value.begin, length * sizeof(char_t));
11392 c[length] = 0;
11393
11394 return c;
11395 }
11396
11397 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2])
11398 {
11399 switch (name.begin[0])
11400 {
11401 case 'b':
11402 if (name == PUGIXML_TEXT("boolean") && argc == 1)
11403 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]);
11404
11405 break;
11406
11407 case 'c':
11408 if (name == PUGIXML_TEXT("count") && argc == 1)
11409 {
11410 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11411 return alloc_node(ast_func_count, xpath_type_number, args[0]);
11412 }
11413 else if (name == PUGIXML_TEXT("contains") && argc == 2)
11414 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]);
11415 else if (name == PUGIXML_TEXT("concat") && argc >= 2)
11416 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]);
11417 else if (name == PUGIXML_TEXT("ceiling") && argc == 1)
11418 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]);
11419
11420 break;
11421
11422 case 'f':
11423 if (name == PUGIXML_TEXT("false") && argc == 0)
11424 return alloc_node(ast_func_false, xpath_type_boolean);
11425 else if (name == PUGIXML_TEXT("floor") && argc == 1)
11426 return alloc_node(ast_func_floor, xpath_type_number, args[0]);
11427
11428 break;
11429
11430 case 'i':
11431 if (name == PUGIXML_TEXT("id") && argc == 1)
11432 return alloc_node(ast_func_id, xpath_type_node_set, args[0]);
11433
11434 break;
11435
11436 case 'l':
11437 if (name == PUGIXML_TEXT("last") && argc == 0)
11438 return alloc_node(ast_func_last, xpath_type_number);
11439 else if (name == PUGIXML_TEXT("lang") && argc == 1)
11440 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]);
11441 else if (name == PUGIXML_TEXT("local-name") && argc <= 1)
11442 {
11443 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11444 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]);
11445 }
11446
11447 break;
11448
11449 case 'n':
11450 if (name == PUGIXML_TEXT("name") && argc <= 1)
11451 {
11452 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11453 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]);
11454 }
11455 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1)
11456 {
11457 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11458 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]);
11459 }
11460 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1)
11461 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]);
11462 else if (name == PUGIXML_TEXT("not") && argc == 1)
11463 return alloc_node(ast_func_not, xpath_type_boolean, args[0]);
11464 else if (name == PUGIXML_TEXT("number") && argc <= 1)
11465 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]);
11466
11467 break;
11468
11469 case 'p':
11470 if (name == PUGIXML_TEXT("position") && argc == 0)
11471 return alloc_node(ast_func_position, xpath_type_number);
11472
11473 break;
11474
11475 case 'r':
11476 if (name == PUGIXML_TEXT("round") && argc == 1)
11477 return alloc_node(ast_func_round, xpath_type_number, args[0]);
11478
11479 break;
11480
11481 case 's':
11482 if (name == PUGIXML_TEXT("string") && argc <= 1)
11483 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]);
11484 else if (name == PUGIXML_TEXT("string-length") && argc <= 1)
11485 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]);
11486 else if (name == PUGIXML_TEXT("starts-with") && argc == 2)
11487 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]);
11488 else if (name == PUGIXML_TEXT("substring-before") && argc == 2)
11489 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]);
11490 else if (name == PUGIXML_TEXT("substring-after") && argc == 2)
11491 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]);
11492 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3))
11493 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]);
11494 else if (name == PUGIXML_TEXT("sum") && argc == 1)
11495 {
11496 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set");
11497 return alloc_node(ast_func_sum, xpath_type_number, args[0]);
11498 }
11499
11500 break;
11501
11502 case 't':
11503 if (name == PUGIXML_TEXT("translate") && argc == 3)
11504 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]);
11505 else if (name == PUGIXML_TEXT("true") && argc == 0)
11506 return alloc_node(ast_func_true, xpath_type_boolean);
11507
11508 break;
11509
11510 default:
11511 break;
11512 }
11513
11514 return error("Unrecognized function or wrong parameter count");
11515 }
11516
11517 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified)
11518 {
11519 specified = true;
11520
11521 switch (name.begin[0])
11522 {
11523 case 'a':
11524 if (name == PUGIXML_TEXT("ancestor"))
11525 return axis_ancestor;
11526 else if (name == PUGIXML_TEXT("ancestor-or-self"))
11527 return axis_ancestor_or_self;
11528 else if (name == PUGIXML_TEXT("attribute"))
11529 return axis_attribute;
11530
11531 break;
11532
11533 case 'c':
11534 if (name == PUGIXML_TEXT("child"))
11535 return axis_child;
11536
11537 break;
11538
11539 case 'd':
11540 if (name == PUGIXML_TEXT("descendant"))
11541 return axis_descendant;
11542 else if (name == PUGIXML_TEXT("descendant-or-self"))
11543 return axis_descendant_or_self;
11544
11545 break;
11546
11547 case 'f':
11548 if (name == PUGIXML_TEXT("following"))
11549 return axis_following;
11550 else if (name == PUGIXML_TEXT("following-sibling"))
11551 return axis_following_sibling;
11552
11553 break;
11554
11555 case 'n':
11556 if (name == PUGIXML_TEXT("namespace"))
11557 return axis_namespace;
11558
11559 break;
11560
11561 case 'p':
11562 if (name == PUGIXML_TEXT("parent"))
11563 return axis_parent;
11564 else if (name == PUGIXML_TEXT("preceding"))
11565 return axis_preceding;
11566 else if (name == PUGIXML_TEXT("preceding-sibling"))
11567 return axis_preceding_sibling;
11568
11569 break;
11570
11571 case 's':
11572 if (name == PUGIXML_TEXT("self"))
11573 return axis_self;
11574
11575 break;
11576
11577 default:
11578 break;
11579 }
11580
11581 specified = false;
11582 return axis_child;
11583 }
11584
11585 nodetest_t parse_node_test_type(const xpath_lexer_string& name)
11586 {
11587 switch (name.begin[0])
11588 {
11589 case 'c':
11590 if (name == PUGIXML_TEXT("comment"))
11591 return nodetest_type_comment;
11592
11593 break;
11594
11595 case 'n':
11596 if (name == PUGIXML_TEXT("node"))
11597 return nodetest_type_node;
11598
11599 break;
11600
11601 case 'p':
11602 if (name == PUGIXML_TEXT("processing-instruction"))
11603 return nodetest_type_pi;
11604
11605 break;
11606
11607 case 't':
11608 if (name == PUGIXML_TEXT("text"))
11609 return nodetest_type_text;
11610
11611 break;
11612
11613 default:
11614 break;
11615 }
11616
11617 return nodetest_none;
11618 }
11619
11620 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall
11621 xpath_ast_node* parse_primary_expression()
11622 {
11623 switch (_lexer.current())
11624 {
11625 case lex_var_ref:
11626 {
11627 xpath_lexer_string name = _lexer.contents();
11628
11629 if (!_variables)
11630 return error("Unknown variable: variable set is not provided");
11631
11632 xpath_variable* var = 0;
11633 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var))
11634 return error_oom();
11635
11636 if (!var)
11637 return error("Unknown variable: variable set does not contain the given name");
11638
11639 _lexer.next();
11640
11641 return alloc_node(ast_variable, var->type(), var);
11642 }
11643
11644 case lex_open_brace:
11645 {
11646 _lexer.next();
11647
11648 xpath_ast_node* n = parse_expression();
11649 if (!n) return 0;
11650
11651 if (_lexer.current() != lex_close_brace)
11652 return error("Expected ')' to match an opening '('");
11653
11654 _lexer.next();
11655
11656 return n;
11657 }
11658
11659 case lex_quoted_string:
11660 {
11661 const char_t* value = alloc_string(_lexer.contents());
11662 if (!value) return 0;
11663
11664 _lexer.next();
11665
11666 return alloc_node(ast_string_constant, xpath_type_string, value);
11667 }
11668
11669 case lex_number:
11670 {
11671 double value = 0;
11672
11673 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value))
11674 return error_oom();
11675
11676 _lexer.next();
11677
11678 return alloc_node(ast_number_constant, xpath_type_number, value);
11679 }
11680
11681 case lex_string:
11682 {
11683 xpath_ast_node* args[2] = {0};
11684 size_t argc = 0;
11685
11686 xpath_lexer_string function = _lexer.contents();
11687 _lexer.next();
11688
11689 xpath_ast_node* last_arg = 0;
11690
11691 if (_lexer.current() != lex_open_brace)
11692 return error("Unrecognized function call");
11693 _lexer.next();
11694
11695 size_t old_depth = _depth;
11696
11697 while (_lexer.current() != lex_close_brace)
11698 {
11699 if (argc > 0)
11700 {
11701 if (_lexer.current() != lex_comma)
11702 return error("No comma between function arguments");
11703 _lexer.next();
11704 }
11705
11706 if (++_depth > xpath_ast_depth_limit)
11707 return error_rec();
11708
11709 xpath_ast_node* n = parse_expression();
11710 if (!n) return 0;
11711
11712 if (argc < 2) args[argc] = n;
11713 else last_arg->set_next(n);
11714
11715 argc++;
11716 last_arg = n;
11717 }
11718
11719 _lexer.next();
11720
11721 _depth = old_depth;
11722
11723 return parse_function(function, argc, args);
11724 }
11725
11726 default:
11727 return error("Unrecognizable primary expression");
11728 }
11729 }
11730
11731 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate
11732 // Predicate ::= '[' PredicateExpr ']'
11733 // PredicateExpr ::= Expr
11734 xpath_ast_node* parse_filter_expression()
11735 {
11736 xpath_ast_node* n = parse_primary_expression();
11737 if (!n) return 0;
11738
11739 size_t old_depth = _depth;
11740
11741 while (_lexer.current() == lex_open_square_brace)
11742 {
11743 _lexer.next();
11744
11745 if (++_depth > xpath_ast_depth_limit)
11746 return error_rec();
11747
11748 if (n->rettype() != xpath_type_node_set)
11749 return error("Predicate has to be applied to node set");
11750
11751 xpath_ast_node* expr = parse_expression();
11752 if (!expr) return 0;
11753
11754 n = alloc_node(ast_filter, n, expr, predicate_default);
11755 if (!n) return 0;
11756
11757 if (_lexer.current() != lex_close_square_brace)
11758 return error("Expected ']' to match an opening '['");
11759
11760 _lexer.next();
11761 }
11762
11763 _depth = old_depth;
11764
11765 return n;
11766 }
11767
11768 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep
11769 // AxisSpecifier ::= AxisName '::' | '@'?
11770 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')'
11771 // NameTest ::= '*' | NCName ':' '*' | QName
11772 // AbbreviatedStep ::= '.' | '..'
11773 xpath_ast_node* parse_step(xpath_ast_node* set)
11774 {
11775 if (set && set->rettype() != xpath_type_node_set)
11776 return error("Step has to be applied to node set");
11777
11778 bool axis_specified = false;
11779 axis_t axis = axis_child; // implied child axis
11780
11781 if (_lexer.current() == lex_axis_attribute)
11782 {
11783 axis = axis_attribute;
11784 axis_specified = true;
11785
11786 _lexer.next();
11787 }
11788 else if (_lexer.current() == lex_dot)
11789 {
11790 _lexer.next();
11791
11792 if (_lexer.current() == lex_open_square_brace)
11793 return error("Predicates are not allowed after an abbreviated step");
11794
11795 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0);
11796 }
11797 else if (_lexer.current() == lex_double_dot)
11798 {
11799 _lexer.next();
11800
11801 if (_lexer.current() == lex_open_square_brace)
11802 return error("Predicates are not allowed after an abbreviated step");
11803
11804 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0);
11805 }
11806
11807 nodetest_t nt_type = nodetest_none;
11808 xpath_lexer_string nt_name;
11809
11810 if (_lexer.current() == lex_string)
11811 {
11812 // node name test
11813 nt_name = _lexer.contents();
11814 _lexer.next();
11815
11816 // was it an axis name?
11817 if (_lexer.current() == lex_double_colon)
11818 {
11819 // parse axis name
11820 if (axis_specified)
11821 return error("Two axis specifiers in one step");
11822
11823 axis = parse_axis_name(nt_name, axis_specified);
11824
11825 if (!axis_specified)
11826 return error("Unknown axis");
11827
11828 // read actual node test
11829 _lexer.next();
11830
11831 if (_lexer.current() == lex_multiply)
11832 {
11833 nt_type = nodetest_all;
11834 nt_name = xpath_lexer_string();
11835 _lexer.next();
11836 }
11837 else if (_lexer.current() == lex_string)
11838 {
11839 nt_name = _lexer.contents();
11840 _lexer.next();
11841 }
11842 else
11843 {
11844 return error("Unrecognized node test");
11845 }
11846 }
11847
11848 if (nt_type == nodetest_none)
11849 {
11850 // node type test or processing-instruction
11851 if (_lexer.current() == lex_open_brace)
11852 {
11853 _lexer.next();
11854
11855 if (_lexer.current() == lex_close_brace)
11856 {
11857 _lexer.next();
11858
11859 nt_type = parse_node_test_type(nt_name);
11860
11861 if (nt_type == nodetest_none)
11862 return error("Unrecognized node type");
11863
11864 nt_name = xpath_lexer_string();
11865 }
11866 else if (nt_name == PUGIXML_TEXT("processing-instruction"))
11867 {
11868 if (_lexer.current() != lex_quoted_string)
11869 return error("Only literals are allowed as arguments to processing-instruction()");
11870
11871 nt_type = nodetest_pi;
11872 nt_name = _lexer.contents();
11873 _lexer.next();
11874
11875 if (_lexer.current() != lex_close_brace)
11876 return error("Unmatched brace near processing-instruction()");
11877 _lexer.next();
11878 }
11879 else
11880 {
11881 return error("Unmatched brace near node type test");
11882 }
11883 }
11884 // QName or NCName:*
11885 else
11886 {
11887 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:*
11888 {
11889 nt_name.end--; // erase *
11890
11891 nt_type = nodetest_all_in_namespace;
11892 }
11893 else
11894 {
11895 nt_type = nodetest_name;
11896 }
11897 }
11898 }
11899 }
11900 else if (_lexer.current() == lex_multiply)
11901 {
11902 nt_type = nodetest_all;
11903 _lexer.next();
11904 }
11905 else
11906 {
11907 return error("Unrecognized node test");
11908 }
11909
11910 const char_t* nt_name_copy = alloc_string(nt_name);
11911 if (!nt_name_copy) return 0;
11912
11913 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy);
11914 if (!n) return 0;
11915
11916 size_t old_depth = _depth;
11917
11918 xpath_ast_node* last = 0;
11919
11920 while (_lexer.current() == lex_open_square_brace)
11921 {
11922 _lexer.next();
11923
11924 if (++_depth > xpath_ast_depth_limit)
11925 return error_rec();
11926
11927 xpath_ast_node* expr = parse_expression();
11928 if (!expr) return 0;
11929
11930 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default);
11931 if (!pred) return 0;
11932
11933 if (_lexer.current() != lex_close_square_brace)
11934 return error("Expected ']' to match an opening '['");
11935 _lexer.next();
11936
11937 if (last) last->set_next(pred);
11938 else n->set_right(pred);
11939
11940 last = pred;
11941 }
11942
11943 _depth = old_depth;
11944
11945 return n;
11946 }
11947
11948 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step
11949 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set)
11950 {
11951 xpath_ast_node* n = parse_step(set);
11952 if (!n) return 0;
11953
11954 size_t old_depth = _depth;
11955
11956 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
11957 {
11958 lexeme_t l = _lexer.current();
11959 _lexer.next();
11960
11961 if (l == lex_double_slash)
11962 {
11963 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
11964 if (!n) return 0;
11965
11966 ++_depth;
11967 }
11968
11969 if (++_depth > xpath_ast_depth_limit)
11970 return error_rec();
11971
11972 n = parse_step(n);
11973 if (!n) return 0;
11974 }
11975
11976 _depth = old_depth;
11977
11978 return n;
11979 }
11980
11981 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath
11982 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath
11983 xpath_ast_node* parse_location_path()
11984 {
11985 if (_lexer.current() == lex_slash)
11986 {
11987 _lexer.next();
11988
11989 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
11990 if (!n) return 0;
11991
11992 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path
11993 lexeme_t l = _lexer.current();
11994
11995 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply)
11996 return parse_relative_location_path(n);
11997 else
11998 return n;
11999 }
12000 else if (_lexer.current() == lex_double_slash)
12001 {
12002 _lexer.next();
12003
12004 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set);
12005 if (!n) return 0;
12006
12007 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
12008 if (!n) return 0;
12009
12010 return parse_relative_location_path(n);
12011 }
12012
12013 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1
12014 return parse_relative_location_path(0);
12015 }
12016
12017 // PathExpr ::= LocationPath
12018 // | FilterExpr
12019 // | FilterExpr '/' RelativeLocationPath
12020 // | FilterExpr '//' RelativeLocationPath
12021 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr
12022 // UnaryExpr ::= UnionExpr | '-' UnaryExpr
12023 xpath_ast_node* parse_path_or_unary_expression()
12024 {
12025 // Clarification.
12026 // PathExpr begins with either LocationPath or FilterExpr.
12027 // FilterExpr begins with PrimaryExpr
12028 // PrimaryExpr begins with '$' in case of it being a variable reference,
12029 // '(' in case of it being an expression, string literal, number constant or
12030 // function call.
12031 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace ||
12032 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number ||
12033 _lexer.current() == lex_string)
12034 {
12035 if (_lexer.current() == lex_string)
12036 {
12037 // This is either a function call, or not - if not, we shall proceed with location path
12038 const char_t* state = _lexer.state();
12039
12040 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state;
12041
12042 if (*state != '(')
12043 return parse_location_path();
12044
12045 // This looks like a function call; however this still can be a node-test. Check it.
12046 if (parse_node_test_type(_lexer.contents()) != nodetest_none)
12047 return parse_location_path();
12048 }
12049
12050 xpath_ast_node* n = parse_filter_expression();
12051 if (!n) return 0;
12052
12053 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash)
12054 {
12055 lexeme_t l = _lexer.current();
12056 _lexer.next();
12057
12058 if (l == lex_double_slash)
12059 {
12060 if (n->rettype() != xpath_type_node_set)
12061 return error("Step has to be applied to node set");
12062
12063 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0);
12064 if (!n) return 0;
12065 }
12066
12067 // select from location path
12068 return parse_relative_location_path(n);
12069 }
12070
12071 return n;
12072 }
12073 else if (_lexer.current() == lex_minus)
12074 {
12075 _lexer.next();
12076
12077 // precedence 7+ - only parses union expressions
12078 xpath_ast_node* n = parse_expression(7);
12079 if (!n) return 0;
12080
12081 return alloc_node(ast_op_negate, xpath_type_number, n);
12082 }
12083 else
12084 {
12085 return parse_location_path();
12086 }
12087 }
12088
12089 struct binary_op_t
12090 {
12091 ast_type_t asttype;
12092 xpath_value_type rettype;
12093 int precedence;
12094
12095 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0)
12096 {
12097 }
12098
12099 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_)
12100 {
12101 }
12102
12103 static binary_op_t parse(xpath_lexer& lexer)
12104 {
12105 switch (lexer.current())
12106 {
12107 case lex_string:
12108 if (lexer.contents() == PUGIXML_TEXT("or"))
12109 return binary_op_t(ast_op_or, xpath_type_boolean, 1);
12110 else if (lexer.contents() == PUGIXML_TEXT("and"))
12111 return binary_op_t(ast_op_and, xpath_type_boolean, 2);
12112 else if (lexer.contents() == PUGIXML_TEXT("div"))
12113 return binary_op_t(ast_op_divide, xpath_type_number, 6);
12114 else if (lexer.contents() == PUGIXML_TEXT("mod"))
12115 return binary_op_t(ast_op_mod, xpath_type_number, 6);
12116 else
12117 return binary_op_t();
12118
12119 case lex_equal:
12120 return binary_op_t(ast_op_equal, xpath_type_boolean, 3);
12121
12122 case lex_not_equal:
12123 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3);
12124
12125 case lex_less:
12126 return binary_op_t(ast_op_less, xpath_type_boolean, 4);
12127
12128 case lex_greater:
12129 return binary_op_t(ast_op_greater, xpath_type_boolean, 4);
12130
12131 case lex_less_or_equal:
12132 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4);
12133
12134 case lex_greater_or_equal:
12135 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4);
12136
12137 case lex_plus:
12138 return binary_op_t(ast_op_add, xpath_type_number, 5);
12139
12140 case lex_minus:
12141 return binary_op_t(ast_op_subtract, xpath_type_number, 5);
12142
12143 case lex_multiply:
12144 return binary_op_t(ast_op_multiply, xpath_type_number, 6);
12145
12146 case lex_union:
12147 return binary_op_t(ast_op_union, xpath_type_node_set, 7);
12148
12149 default:
12150 return binary_op_t();
12151 }
12152 }
12153 };
12154
12155 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit)
12156 {
12157 binary_op_t op = binary_op_t::parse(_lexer);
12158
12159 while (op.asttype != ast_unknown && op.precedence >= limit)
12160 {
12161 _lexer.next();
12162
12163 if (++_depth > xpath_ast_depth_limit)
12164 return error_rec();
12165
12166 xpath_ast_node* rhs = parse_path_or_unary_expression();
12167 if (!rhs) return 0;
12168
12169 binary_op_t nextop = binary_op_t::parse(_lexer);
12170
12171 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence)
12172 {
12173 rhs = parse_expression_rec(rhs, nextop.precedence);
12174 if (!rhs) return 0;
12175
12176 nextop = binary_op_t::parse(_lexer);
12177 }
12178
12179 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set))
12180 return error("Union operator has to be applied to node sets");
12181
12182 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs);
12183 if (!lhs) return 0;
12184
12185 op = binary_op_t::parse(_lexer);
12186 }
12187
12188 return lhs;
12189 }
12190
12191 // Expr ::= OrExpr
12192 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr
12193 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr
12194 // EqualityExpr ::= RelationalExpr
12195 // | EqualityExpr '=' RelationalExpr
12196 // | EqualityExpr '!=' RelationalExpr
12197 // RelationalExpr ::= AdditiveExpr
12198 // | RelationalExpr '<' AdditiveExpr
12199 // | RelationalExpr '>' AdditiveExpr
12200 // | RelationalExpr '<=' AdditiveExpr
12201 // | RelationalExpr '>=' AdditiveExpr
12202 // AdditiveExpr ::= MultiplicativeExpr
12203 // | AdditiveExpr '+' MultiplicativeExpr
12204 // | AdditiveExpr '-' MultiplicativeExpr
12205 // MultiplicativeExpr ::= UnaryExpr
12206 // | MultiplicativeExpr '*' UnaryExpr
12207 // | MultiplicativeExpr 'div' UnaryExpr
12208 // | MultiplicativeExpr 'mod' UnaryExpr
12209 xpath_ast_node* parse_expression(int limit = 0)
12210 {
12211 size_t old_depth = _depth;
12212
12213 if (++_depth > xpath_ast_depth_limit)
12214 return error_rec();
12215
12216 xpath_ast_node* n = parse_path_or_unary_expression();
12217 if (!n) return 0;
12218
12219 n = parse_expression_rec(n, limit);
12220
12221 _depth = old_depth;
12222
12223 return n;
12224 }
12225
12226 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0)
12227 {
12228 }
12229
12230 xpath_ast_node* parse()
12231 {
12232 xpath_ast_node* n = parse_expression();
12233 if (!n) return 0;
12234
12235 assert(_depth == 0);
12236
12237 // check if there are unparsed tokens left
12238 if (_lexer.current() != lex_eof)
12239 return error("Incorrect query");
12240
12241 return n;
12242 }
12243
12244 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result)
12245 {
12246 xpath_parser parser(query, variables, alloc, result);
12247
12248 return parser.parse();
12249 }
12250 };
12251
12252 struct xpath_query_impl
12253 {
12254 static xpath_query_impl* create()
12255 {
12256 void* memory = xml_memory::allocate(sizeof(xpath_query_impl));
12257 if (!memory) return 0;
12258
12259 return new (memory) xpath_query_impl();
12260 }
12261
12262 static void destroy(xpath_query_impl* impl)
12263 {
12264 // free all allocated pages
12265 impl->alloc.release();
12266
12267 // free allocator memory (with the first page)
12268 xml_memory::deallocate(impl);
12269 }
12270
12271 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false)
12272 {
12273 block.next = 0;
12274 block.capacity = sizeof(block.data);
12275 }
12276
12277 xpath_ast_node* root;
12278 xpath_allocator alloc;
12279 xpath_memory_block block;
12280 bool oom;
12281 };
12282
12283 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl)
12284 {
12285 if (!impl) return 0;
12286
12287 if (impl->root->rettype() != xpath_type_node_set)
12288 {
12289 #ifdef PUGIXML_NO_EXCEPTIONS
12290 return 0;
12291 #else
12292 xpath_parse_result res;
12293 res.error = "Expression does not evaluate to node set";
12294
12295 throw xpath_exception(res);
12296 #endif
12297 }
12298
12299 return impl->root;
12300 }
12301 PUGI__NS_END
12302
12303 namespace pugi
12304 {
12305 #ifndef PUGIXML_NO_EXCEPTIONS
12306 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_)
12307 {
12308 assert(_result.error);
12309 }
12310
12311 PUGI__FN const char* xpath_exception::what() const throw()
12312 {
12313 return _result.error;
12314 }
12315
12316 PUGI__FN const xpath_parse_result& xpath_exception::result() const
12317 {
12318 return _result;
12319 }
12320 #endif
12321
12322 PUGI__FN xpath_node::xpath_node()
12323 {
12324 }
12325
12326 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_)
12327 {
12328 }
12329
12330 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_)
12331 {
12332 }
12333
12334 PUGI__FN xml_node xpath_node::node() const
12335 {
12336 return _attribute ? xml_node() : _node;
12337 }
12338
12339 PUGI__FN xml_attribute xpath_node::attribute() const
12340 {
12341 return _attribute;
12342 }
12343
12344 PUGI__FN xml_node xpath_node::parent() const
12345 {
12346 return _attribute ? _node : _node.parent();
12347 }
12348
12349 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***)
12350 {
12351 }
12352
12353 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const
12354 {
12355 return (_node || _attribute) ? unspecified_bool_xpath_node : 0;
12356 }
12357
12358 PUGI__FN bool xpath_node::operator!() const
12359 {
12360 return !(_node || _attribute);
12361 }
12362
12363 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const
12364 {
12365 return _node == n._node && _attribute == n._attribute;
12366 }
12367
12368 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const
12369 {
12370 return _node != n._node || _attribute != n._attribute;
12371 }
12372
12373 #ifdef __BORLANDC__
12374 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs)
12375 {
12376 return (bool)lhs && rhs;
12377 }
12378
12379 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs)
12380 {
12381 return (bool)lhs || rhs;
12382 }
12383 #endif
12384
12385 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_)
12386 {
12387 assert(begin_ <= end_);
12388
12389 size_t size_ = static_cast<size_t>(end_ - begin_);
12390
12391 // use internal buffer for 0 or 1 elements, heap buffer otherwise
12392 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node)));
12393
12394 if (!storage)
12395 {
12396 #ifdef PUGIXML_NO_EXCEPTIONS
12397 return;
12398 #else
12399 throw std::bad_alloc();
12400 #endif
12401 }
12402
12403 // deallocate old buffer
12404 if (_begin != _storage)
12405 impl::xml_memory::deallocate(_begin);
12406
12407 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB
12408 if (size_)
12409 memcpy(storage, begin_, size_ * sizeof(xpath_node));
12410
12411 _begin = storage;
12412 _end = storage + size_;
12413 _type = type_;
12414 }
12415
12416 #ifdef PUGIXML_HAS_MOVE
12417 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT
12418 {
12419 _type = rhs._type;
12420 _storage[0] = rhs._storage[0];
12421 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin;
12422 _end = _begin + (rhs._end - rhs._begin);
12423
12424 rhs._type = type_unsorted;
12425 rhs._begin = rhs._storage;
12426 rhs._end = rhs._storage;
12427 }
12428 #endif
12429
12430 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage)
12431 {
12432 }
12433
12434 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage)
12435 {
12436 _assign(begin_, end_, type_);
12437 }
12438
12439 PUGI__FN xpath_node_set::~xpath_node_set()
12440 {
12441 if (_begin != _storage)
12442 impl::xml_memory::deallocate(_begin);
12443 }
12444
12445 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage)
12446 {
12447 _assign(ns._begin, ns._end, ns._type);
12448 }
12449
12450 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns)
12451 {
12452 if (this == &ns) return *this;
12453
12454 _assign(ns._begin, ns._end, ns._type);
12455
12456 return *this;
12457 }
12458
12459 #ifdef PUGIXML_HAS_MOVE
12460 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage)
12461 {
12462 _move(rhs);
12463 }
12464
12465 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT
12466 {
12467 if (this == &rhs) return *this;
12468
12469 if (_begin != _storage)
12470 impl::xml_memory::deallocate(_begin);
12471
12472 _move(rhs);
12473
12474 return *this;
12475 }
12476 #endif
12477
12478 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const
12479 {
12480 return _type;
12481 }
12482
12483 PUGI__FN size_t xpath_node_set::size() const
12484 {
12485 return _end - _begin;
12486 }
12487
12488 PUGI__FN bool xpath_node_set::empty() const
12489 {
12490 return _begin == _end;
12491 }
12492
12493 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const
12494 {
12495 assert(index < size());
12496 return _begin[index];
12497 }
12498
12499 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const
12500 {
12501 return _begin;
12502 }
12503
12504 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const
12505 {
12506 return _end;
12507 }
12508
12509 PUGI__FN void xpath_node_set::sort(bool reverse)
12510 {
12511 _type = impl::xpath_sort(_begin, _end, _type, reverse);
12512 }
12513
12514 PUGI__FN xpath_node xpath_node_set::first() const
12515 {
12516 return impl::xpath_first(_begin, _end, _type);
12517 }
12518
12519 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0)
12520 {
12521 }
12522
12523 PUGI__FN xpath_parse_result::operator bool() const
12524 {
12525 return error == 0;
12526 }
12527
12528 PUGI__FN const char* xpath_parse_result::description() const
12529 {
12530 return error ? error : "No error";
12531 }
12532
12533 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0)
12534 {
12535 }
12536
12537 PUGI__FN const char_t* xpath_variable::name() const
12538 {
12539 switch (_type)
12540 {
12541 case xpath_type_node_set:
12542 return static_cast<const impl::xpath_variable_node_set*>(this)->name;
12543
12544 case xpath_type_number:
12545 return static_cast<const impl::xpath_variable_number*>(this)->name;
12546
12547 case xpath_type_string:
12548 return static_cast<const impl::xpath_variable_string*>(this)->name;
12549
12550 case xpath_type_boolean:
12551 return static_cast<const impl::xpath_variable_boolean*>(this)->name;
12552
12553 default:
12554 assert(false && "Invalid variable type"); // unreachable
12555 return 0;
12556 }
12557 }
12558
12559 PUGI__FN xpath_value_type xpath_variable::type() const
12560 {
12561 return _type;
12562 }
12563
12564 PUGI__FN bool xpath_variable::get_boolean() const
12565 {
12566 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false;
12567 }
12568
12569 PUGI__FN double xpath_variable::get_number() const
12570 {
12571 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan();
12572 }
12573
12574 PUGI__FN const char_t* xpath_variable::get_string() const
12575 {
12576 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0;
12577 return value ? value : PUGIXML_TEXT("");
12578 }
12579
12580 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const
12581 {
12582 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set;
12583 }
12584
12585 PUGI__FN bool xpath_variable::set(bool value)
12586 {
12587 if (_type != xpath_type_boolean) return false;
12588
12589 static_cast<impl::xpath_variable_boolean*>(this)->value = value;
12590 return true;
12591 }
12592
12593 PUGI__FN bool xpath_variable::set(double value)
12594 {
12595 if (_type != xpath_type_number) return false;
12596
12597 static_cast<impl::xpath_variable_number*>(this)->value = value;
12598 return true;
12599 }
12600
12601 PUGI__FN bool xpath_variable::set(const char_t* value)
12602 {
12603 if (_type != xpath_type_string) return false;
12604
12605 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this);
12606
12607 // duplicate string
12608 size_t size = (impl::strlength(value) + 1) * sizeof(char_t);
12609
12610 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size));
12611 if (!copy) return false;
12612
12613 memcpy(copy, value, size);
12614
12615 // replace old string
12616 if (var->value) impl::xml_memory::deallocate(var->value);
12617 var->value = copy;
12618
12619 return true;
12620 }
12621
12622 PUGI__FN bool xpath_variable::set(const xpath_node_set& value)
12623 {
12624 if (_type != xpath_type_node_set) return false;
12625
12626 static_cast<impl::xpath_variable_node_set*>(this)->value = value;
12627 return true;
12628 }
12629
12630 PUGI__FN xpath_variable_set::xpath_variable_set()
12631 {
12632 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12633 _data[i] = 0;
12634 }
12635
12636 PUGI__FN xpath_variable_set::~xpath_variable_set()
12637 {
12638 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12639 _destroy(_data[i]);
12640 }
12641
12642 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs)
12643 {
12644 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12645 _data[i] = 0;
12646
12647 _assign(rhs);
12648 }
12649
12650 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs)
12651 {
12652 if (this == &rhs) return *this;
12653
12654 _assign(rhs);
12655
12656 return *this;
12657 }
12658
12659 #ifdef PUGIXML_HAS_MOVE
12660 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12661 {
12662 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12663 {
12664 _data[i] = rhs._data[i];
12665 rhs._data[i] = 0;
12666 }
12667 }
12668
12669 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT
12670 {
12671 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12672 {
12673 _destroy(_data[i]);
12674
12675 _data[i] = rhs._data[i];
12676 rhs._data[i] = 0;
12677 }
12678
12679 return *this;
12680 }
12681 #endif
12682
12683 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs)
12684 {
12685 xpath_variable_set temp;
12686
12687 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12688 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i]))
12689 return;
12690
12691 _swap(temp);
12692 }
12693
12694 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs)
12695 {
12696 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i)
12697 {
12698 xpath_variable* chain = _data[i];
12699
12700 _data[i] = rhs._data[i];
12701 rhs._data[i] = chain;
12702 }
12703 }
12704
12705 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const
12706 {
12707 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12708 size_t hash = impl::hash_string(name) % hash_size;
12709
12710 // look for existing variable
12711 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12712 if (impl::strequal(var->name(), name))
12713 return var;
12714
12715 return 0;
12716 }
12717
12718 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result)
12719 {
12720 xpath_variable* last = 0;
12721
12722 while (var)
12723 {
12724 // allocate storage for new variable
12725 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name());
12726 if (!nvar) return false;
12727
12728 // link the variable to the result immediately to handle failures gracefully
12729 if (last)
12730 last->_next = nvar;
12731 else
12732 *out_result = nvar;
12733
12734 last = nvar;
12735
12736 // copy the value; this can fail due to out-of-memory conditions
12737 if (!impl::copy_xpath_variable(nvar, var)) return false;
12738
12739 var = var->_next;
12740 }
12741
12742 return true;
12743 }
12744
12745 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var)
12746 {
12747 while (var)
12748 {
12749 xpath_variable* next = var->_next;
12750
12751 impl::delete_xpath_variable(var->_type, var);
12752
12753 var = next;
12754 }
12755 }
12756
12757 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type)
12758 {
12759 const size_t hash_size = sizeof(_data) / sizeof(_data[0]);
12760 size_t hash = impl::hash_string(name) % hash_size;
12761
12762 // look for existing variable
12763 for (xpath_variable* var = _data[hash]; var; var = var->_next)
12764 if (impl::strequal(var->name(), name))
12765 return var->type() == type ? var : 0;
12766
12767 // add new variable
12768 xpath_variable* result = impl::new_xpath_variable(type, name);
12769
12770 if (result)
12771 {
12772 result->_next = _data[hash];
12773
12774 _data[hash] = result;
12775 }
12776
12777 return result;
12778 }
12779
12780 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value)
12781 {
12782 xpath_variable* var = add(name, xpath_type_boolean);
12783 return var ? var->set(value) : false;
12784 }
12785
12786 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value)
12787 {
12788 xpath_variable* var = add(name, xpath_type_number);
12789 return var ? var->set(value) : false;
12790 }
12791
12792 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value)
12793 {
12794 xpath_variable* var = add(name, xpath_type_string);
12795 return var ? var->set(value) : false;
12796 }
12797
12798 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value)
12799 {
12800 xpath_variable* var = add(name, xpath_type_node_set);
12801 return var ? var->set(value) : false;
12802 }
12803
12804 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name)
12805 {
12806 return _find(name);
12807 }
12808
12809 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const
12810 {
12811 return _find(name);
12812 }
12813
12814 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0)
12815 {
12816 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create();
12817
12818 if (!qimpl)
12819 {
12820 #ifdef PUGIXML_NO_EXCEPTIONS
12821 _result.error = "Out of memory";
12822 #else
12823 throw std::bad_alloc();
12824 #endif
12825 }
12826 else
12827 {
12828 using impl::auto_deleter; // MSVC7 workaround
12829 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy);
12830
12831 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result);
12832
12833 if (qimpl->root)
12834 {
12835 qimpl->root->optimize(&qimpl->alloc);
12836
12837 _impl = impl.release();
12838 _result.error = 0;
12839 }
12840 else
12841 {
12842 #ifdef PUGIXML_NO_EXCEPTIONS
12843 if (qimpl->oom) _result.error = "Out of memory";
12844 #else
12845 if (qimpl->oom) throw std::bad_alloc();
12846 throw xpath_exception(_result);
12847 #endif
12848 }
12849 }
12850 }
12851
12852 PUGI__FN xpath_query::xpath_query(): _impl(0)
12853 {
12854 }
12855
12856 PUGI__FN xpath_query::~xpath_query()
12857 {
12858 if (_impl)
12859 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12860 }
12861
12862 #ifdef PUGIXML_HAS_MOVE
12863 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT
12864 {
12865 _impl = rhs._impl;
12866 _result = rhs._result;
12867 rhs._impl = 0;
12868 rhs._result = xpath_parse_result();
12869 }
12870
12871 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT
12872 {
12873 if (this == &rhs) return *this;
12874
12875 if (_impl)
12876 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl));
12877
12878 _impl = rhs._impl;
12879 _result = rhs._result;
12880 rhs._impl = 0;
12881 rhs._result = xpath_parse_result();
12882
12883 return *this;
12884 }
12885 #endif
12886
12887 PUGI__FN xpath_value_type xpath_query::return_type() const
12888 {
12889 if (!_impl) return xpath_type_none;
12890
12891 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype();
12892 }
12893
12894 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const
12895 {
12896 if (!_impl) return false;
12897
12898 impl::xpath_context c(n, 1, 1);
12899 impl::xpath_stack_data sd;
12900
12901 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack);
12902
12903 if (sd.oom)
12904 {
12905 #ifdef PUGIXML_NO_EXCEPTIONS
12906 return false;
12907 #else
12908 throw std::bad_alloc();
12909 #endif
12910 }
12911
12912 return r;
12913 }
12914
12915 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const
12916 {
12917 if (!_impl) return impl::gen_nan();
12918
12919 impl::xpath_context c(n, 1, 1);
12920 impl::xpath_stack_data sd;
12921
12922 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack);
12923
12924 if (sd.oom)
12925 {
12926 #ifdef PUGIXML_NO_EXCEPTIONS
12927 return impl::gen_nan();
12928 #else
12929 throw std::bad_alloc();
12930 #endif
12931 }
12932
12933 return r;
12934 }
12935
12936 #ifndef PUGIXML_NO_STL
12937 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const
12938 {
12939 if (!_impl) return string_t();
12940
12941 impl::xpath_context c(n, 1, 1);
12942 impl::xpath_stack_data sd;
12943
12944 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack);
12945
12946 if (sd.oom)
12947 {
12948 #ifdef PUGIXML_NO_EXCEPTIONS
12949 return string_t();
12950 #else
12951 throw std::bad_alloc();
12952 #endif
12953 }
12954
12955 return string_t(r.c_str(), r.length());
12956 }
12957 #endif
12958
12959 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const
12960 {
12961 impl::xpath_context c(n, 1, 1);
12962 impl::xpath_stack_data sd;
12963
12964 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string();
12965
12966 if (sd.oom)
12967 {
12968 #ifdef PUGIXML_NO_EXCEPTIONS
12969 r = impl::xpath_string();
12970 #else
12971 throw std::bad_alloc();
12972 #endif
12973 }
12974
12975 size_t full_size = r.length() + 1;
12976
12977 if (capacity > 0)
12978 {
12979 size_t size = (full_size < capacity) ? full_size : capacity;
12980 assert(size > 0);
12981
12982 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t));
12983 buffer[size - 1] = 0;
12984 }
12985
12986 return full_size;
12987 }
12988
12989 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const
12990 {
12991 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
12992 if (!root) return xpath_node_set();
12993
12994 impl::xpath_context c(n, 1, 1);
12995 impl::xpath_stack_data sd;
12996
12997 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all);
12998
12999 if (sd.oom)
13000 {
13001 #ifdef PUGIXML_NO_EXCEPTIONS
13002 return xpath_node_set();
13003 #else
13004 throw std::bad_alloc();
13005 #endif
13006 }
13007
13008 return xpath_node_set(r.begin(), r.end(), r.type());
13009 }
13010
13011 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const
13012 {
13013 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl));
13014 if (!root) return xpath_node();
13015
13016 impl::xpath_context c(n, 1, 1);
13017 impl::xpath_stack_data sd;
13018
13019 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first);
13020
13021 if (sd.oom)
13022 {
13023 #ifdef PUGIXML_NO_EXCEPTIONS
13024 return xpath_node();
13025 #else
13026 throw std::bad_alloc();
13027 #endif
13028 }
13029
13030 return r.first();
13031 }
13032
13033 PUGI__FN const xpath_parse_result& xpath_query::result() const
13034 {
13035 return _result;
13036 }
13037
13038 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***)
13039 {
13040 }
13041
13042 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const
13043 {
13044 return _impl ? unspecified_bool_xpath_query : 0;
13045 }
13046
13047 PUGI__FN bool xpath_query::operator!() const
13048 {
13049 return !_impl;
13050 }
13051
13052 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const
13053 {
13054 xpath_query q(query, variables);
13055 return q.evaluate_node(*this);
13056 }
13057
13058 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const
13059 {
13060 return query.evaluate_node(*this);
13061 }
13062
13063 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const
13064 {
13065 xpath_query q(query, variables);
13066 return q.evaluate_node_set(*this);
13067 }
13068
13069 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const
13070 {
13071 return query.evaluate_node_set(*this);
13072 }
13073
13074 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const
13075 {
13076 xpath_query q(query, variables);
13077 return q.evaluate_node(*this);
13078 }
13079
13080 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const
13081 {
13082 return query.evaluate_node(*this);
13083 }
13084 }
13085
13086 #endif
13087
13088 #ifdef __BORLANDC__
13089 # pragma option pop
13090 #endif
13091
13092 // Intel C++ does not properly keep warning state for function templates,
13093 // so popping warning state at the end of translation unit leads to warnings in the middle.
13094 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
13095 # pragma warning(pop)
13096 #endif
13097
13098 #if defined(_MSC_VER) && defined(__c2__)
13099 # pragma clang diagnostic pop
13100 #endif
13101
13102 // Undefine all local macros (makes sure we're not leaking macros in header-only mode)
13103 #undef PUGI__NO_INLINE
13104 #undef PUGI__UNLIKELY
13105 #undef PUGI__STATIC_ASSERT
13106 #undef PUGI__DMC_VOLATILE
13107 #undef PUGI__UNSIGNED_OVERFLOW
13108 #undef PUGI__MSVC_CRT_VERSION
13109 #undef PUGI__SNPRINTF
13110 #undef PUGI__NS_BEGIN
13111 #undef PUGI__NS_END
13112 #undef PUGI__FN
13113 #undef PUGI__FN_NO_INLINE
13114 #undef PUGI__GETHEADER_IMPL
13115 #undef PUGI__GETPAGE_IMPL
13116 #undef PUGI__GETPAGE
13117 #undef PUGI__NODETYPE
13118 #undef PUGI__IS_CHARTYPE_IMPL
13119 #undef PUGI__IS_CHARTYPE
13120 #undef PUGI__IS_CHARTYPEX
13121 #undef PUGI__ENDSWITH
13122 #undef PUGI__SKIPWS
13123 #undef PUGI__OPTSET
13124 #undef PUGI__PUSHNODE
13125 #undef PUGI__POPNODE
13126 #undef PUGI__SCANFOR
13127 #undef PUGI__SCANWHILE
13128 #undef PUGI__SCANWHILE_UNROLL
13129 #undef PUGI__ENDSEG
13130 #undef PUGI__THROW_ERROR
13131 #undef PUGI__CHECK_ERROR
13132
13133 #endif
13134
13135 /**
13136 * Copyright (c) 2006-2022 Arseny Kapoulkine
13137 *
13138 * Permission is hereby granted, free of charge, to any person
13139 * obtaining a copy of this software and associated documentation
13140 * files (the "Software"), to deal in the Software without
13141 * restriction, including without limitation the rights to use,
13142 * copy, modify, merge, publish, distribute, sublicense, and/or sell
13143 * copies of the Software, and to permit persons to whom the
13144 * Software is furnished to do so, subject to the following
13145 * conditions:
13146 *
13147 * The above copyright notice and this permission notice shall be
13148 * included in all copies or substantial portions of the Software.
13149 *
13150 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
13151 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
13152 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
13153 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
13154 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
13155 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
13156 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
13157 * OTHER DEALINGS IN THE SOFTWARE.
13158 */