Mercurial > minori
comparison dep/pugixml/src/pugixml.cpp @ 55:d10b6c6b432e
add xml lib, we will need to use it eventually
author | Paper <mrpapersonic@gmail.com> |
---|---|
date | Tue, 26 Sep 2023 12:37:08 -0400 |
parents | |
children | a45edd073f9e |
comparison
equal
deleted
inserted
replaced
54:466ac9870df9 | 55:d10b6c6b432e |
---|---|
1 /** | |
2 * pugixml parser - version 1.13 | |
3 * -------------------------------------------------------- | |
4 * Copyright (C) 2006-2022, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) | |
5 * Report bugs and download new versions at https://pugixml.org/ | |
6 * | |
7 * This library is distributed under the MIT License. See notice at the end | |
8 * of this file. | |
9 * | |
10 * This work is based on the pugxml parser, which is: | |
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) | |
12 */ | |
13 | |
14 #ifndef SOURCE_PUGIXML_CPP | |
15 #define SOURCE_PUGIXML_CPP | |
16 | |
17 #include "pugixml.hpp" | |
18 | |
19 #include <stdlib.h> | |
20 #include <stdio.h> | |
21 #include <string.h> | |
22 #include <assert.h> | |
23 #include <limits.h> | |
24 | |
25 #ifdef PUGIXML_WCHAR_MODE | |
26 # include <wchar.h> | |
27 #endif | |
28 | |
29 #ifndef PUGIXML_NO_XPATH | |
30 # include <math.h> | |
31 # include <float.h> | |
32 #endif | |
33 | |
34 #ifndef PUGIXML_NO_STL | |
35 # include <istream> | |
36 # include <ostream> | |
37 # include <string> | |
38 #endif | |
39 | |
40 // For placement new | |
41 #include <new> | |
42 | |
43 #ifdef _MSC_VER | |
44 # pragma warning(push) | |
45 # pragma warning(disable: 4127) // conditional expression is constant | |
46 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) | |
47 # pragma warning(disable: 4702) // unreachable code | |
48 # pragma warning(disable: 4996) // this function or variable may be unsafe | |
49 #endif | |
50 | |
51 #if defined(_MSC_VER) && defined(__c2__) | |
52 # pragma clang diagnostic push | |
53 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe | |
54 #endif | |
55 | |
56 #ifdef __INTEL_COMPILER | |
57 # pragma warning(disable: 177) // function was declared but never referenced | |
58 # pragma warning(disable: 279) // controlling expression is constant | |
59 # pragma warning(disable: 1478 1786) // function was declared "deprecated" | |
60 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type | |
61 #endif | |
62 | |
63 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) | |
64 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away | |
65 #endif | |
66 | |
67 #ifdef __BORLANDC__ | |
68 # pragma option push | |
69 # pragma warn -8008 // condition is always false | |
70 # pragma warn -8066 // unreachable code | |
71 #endif | |
72 | |
73 #ifdef __SNC__ | |
74 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug | |
75 # pragma diag_suppress=178 // function was declared but never referenced | |
76 # pragma diag_suppress=237 // controlling expression is constant | |
77 #endif | |
78 | |
79 #ifdef __TI_COMPILER_VERSION__ | |
80 # pragma diag_suppress 179 // function was declared but never referenced | |
81 #endif | |
82 | |
83 // Inlining controls | |
84 #if defined(_MSC_VER) && _MSC_VER >= 1300 | |
85 # define PUGI__NO_INLINE __declspec(noinline) | |
86 #elif defined(__GNUC__) | |
87 # define PUGI__NO_INLINE __attribute__((noinline)) | |
88 #else | |
89 # define PUGI__NO_INLINE | |
90 #endif | |
91 | |
92 // Branch weight controls | |
93 #if defined(__GNUC__) && !defined(__c2__) | |
94 # define PUGI__UNLIKELY(cond) __builtin_expect(cond, 0) | |
95 #else | |
96 # define PUGI__UNLIKELY(cond) (cond) | |
97 #endif | |
98 | |
99 // Simple static assertion | |
100 #define PUGI__STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } | |
101 | |
102 // Digital Mars C++ bug workaround for passing char loaded from memory via stack | |
103 #ifdef __DMC__ | |
104 # define PUGI__DMC_VOLATILE volatile | |
105 #else | |
106 # define PUGI__DMC_VOLATILE | |
107 #endif | |
108 | |
109 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings | |
110 #if defined(__clang__) && defined(__has_attribute) | |
111 # if __has_attribute(no_sanitize) | |
112 # define PUGI__UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) | |
113 # else | |
114 # define PUGI__UNSIGNED_OVERFLOW | |
115 # endif | |
116 #else | |
117 # define PUGI__UNSIGNED_OVERFLOW | |
118 #endif | |
119 | |
120 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) | |
121 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) | |
122 using std::memcpy; | |
123 using std::memmove; | |
124 using std::memset; | |
125 #endif | |
126 | |
127 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations | |
128 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) | |
129 # define LLONG_MIN (-LLONG_MAX - 1LL) | |
130 # define LLONG_MAX __LONG_LONG_MAX__ | |
131 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) | |
132 #endif | |
133 | |
134 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features | |
135 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE) | |
136 # define PUGI__MSVC_CRT_VERSION _MSC_VER | |
137 #elif defined(_WIN32_WCE) | |
138 # define PUGI__MSVC_CRT_VERSION 1310 // MSVC7.1 | |
139 #endif | |
140 | |
141 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. | |
142 #if __cplusplus >= 201103 | |
143 # define PUGI__SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) | |
144 #elif defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
145 # define PUGI__SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) | |
146 #else | |
147 # define PUGI__SNPRINTF sprintf | |
148 #endif | |
149 | |
150 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. | |
151 #ifdef PUGIXML_HEADER_ONLY | |
152 # define PUGI__NS_BEGIN namespace pugi { namespace impl { | |
153 # define PUGI__NS_END } } | |
154 # define PUGI__FN inline | |
155 # define PUGI__FN_NO_INLINE inline | |
156 #else | |
157 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces | |
158 # define PUGI__NS_BEGIN namespace pugi { namespace impl { | |
159 # define PUGI__NS_END } } | |
160 # else | |
161 # define PUGI__NS_BEGIN namespace pugi { namespace impl { namespace { | |
162 # define PUGI__NS_END } } } | |
163 # endif | |
164 # define PUGI__FN | |
165 # define PUGI__FN_NO_INLINE PUGI__NO_INLINE | |
166 #endif | |
167 | |
168 // uintptr_t | |
169 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) | |
170 namespace pugi | |
171 { | |
172 # ifndef _UINTPTR_T_DEFINED | |
173 typedef size_t uintptr_t; | |
174 # endif | |
175 | |
176 typedef unsigned __int8 uint8_t; | |
177 typedef unsigned __int16 uint16_t; | |
178 typedef unsigned __int32 uint32_t; | |
179 } | |
180 #else | |
181 # include <stdint.h> | |
182 #endif | |
183 | |
184 // Memory allocation | |
185 PUGI__NS_BEGIN | |
186 PUGI__FN void* default_allocate(size_t size) | |
187 { | |
188 return malloc(size); | |
189 } | |
190 | |
191 PUGI__FN void default_deallocate(void* ptr) | |
192 { | |
193 free(ptr); | |
194 } | |
195 | |
196 template <typename T> | |
197 struct xml_memory_management_function_storage | |
198 { | |
199 static allocation_function allocate; | |
200 static deallocation_function deallocate; | |
201 }; | |
202 | |
203 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them | |
204 // Without a template<> we'll get multiple definitions of the same static | |
205 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; | |
206 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; | |
207 | |
208 typedef xml_memory_management_function_storage<int> xml_memory; | |
209 PUGI__NS_END | |
210 | |
211 // String utilities | |
212 PUGI__NS_BEGIN | |
213 // Get string length | |
214 PUGI__FN size_t strlength(const char_t* s) | |
215 { | |
216 assert(s); | |
217 | |
218 #ifdef PUGIXML_WCHAR_MODE | |
219 return wcslen(s); | |
220 #else | |
221 return strlen(s); | |
222 #endif | |
223 } | |
224 | |
225 // Compare two strings | |
226 PUGI__FN bool strequal(const char_t* src, const char_t* dst) | |
227 { | |
228 assert(src && dst); | |
229 | |
230 #ifdef PUGIXML_WCHAR_MODE | |
231 return wcscmp(src, dst) == 0; | |
232 #else | |
233 return strcmp(src, dst) == 0; | |
234 #endif | |
235 } | |
236 | |
237 // Compare lhs with [rhs_begin, rhs_end) | |
238 PUGI__FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) | |
239 { | |
240 for (size_t i = 0; i < count; ++i) | |
241 if (lhs[i] != rhs[i]) | |
242 return false; | |
243 | |
244 return lhs[count] == 0; | |
245 } | |
246 | |
247 // Get length of wide string, even if CRT lacks wide character support | |
248 PUGI__FN size_t strlength_wide(const wchar_t* s) | |
249 { | |
250 assert(s); | |
251 | |
252 #ifdef PUGIXML_WCHAR_MODE | |
253 return wcslen(s); | |
254 #else | |
255 const wchar_t* end = s; | |
256 while (*end) end++; | |
257 return static_cast<size_t>(end - s); | |
258 #endif | |
259 } | |
260 PUGI__NS_END | |
261 | |
262 // auto_ptr-like object for exception recovery | |
263 PUGI__NS_BEGIN | |
264 template <typename T> struct auto_deleter | |
265 { | |
266 typedef void (*D)(T*); | |
267 | |
268 T* data; | |
269 D deleter; | |
270 | |
271 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) | |
272 { | |
273 } | |
274 | |
275 ~auto_deleter() | |
276 { | |
277 if (data) deleter(data); | |
278 } | |
279 | |
280 T* release() | |
281 { | |
282 T* result = data; | |
283 data = 0; | |
284 return result; | |
285 } | |
286 }; | |
287 PUGI__NS_END | |
288 | |
289 #ifdef PUGIXML_COMPACT | |
290 PUGI__NS_BEGIN | |
291 class compact_hash_table | |
292 { | |
293 public: | |
294 compact_hash_table(): _items(0), _capacity(0), _count(0) | |
295 { | |
296 } | |
297 | |
298 void clear() | |
299 { | |
300 if (_items) | |
301 { | |
302 xml_memory::deallocate(_items); | |
303 _items = 0; | |
304 _capacity = 0; | |
305 _count = 0; | |
306 } | |
307 } | |
308 | |
309 void* find(const void* key) | |
310 { | |
311 if (_capacity == 0) return 0; | |
312 | |
313 item_t* item = get_item(key); | |
314 assert(item); | |
315 assert(item->key == key || (item->key == 0 && item->value == 0)); | |
316 | |
317 return item->value; | |
318 } | |
319 | |
320 void insert(const void* key, void* value) | |
321 { | |
322 assert(_capacity != 0 && _count < _capacity - _capacity / 4); | |
323 | |
324 item_t* item = get_item(key); | |
325 assert(item); | |
326 | |
327 if (item->key == 0) | |
328 { | |
329 _count++; | |
330 item->key = key; | |
331 } | |
332 | |
333 item->value = value; | |
334 } | |
335 | |
336 bool reserve(size_t extra = 16) | |
337 { | |
338 if (_count + extra >= _capacity - _capacity / 4) | |
339 return rehash(_count + extra); | |
340 | |
341 return true; | |
342 } | |
343 | |
344 private: | |
345 struct item_t | |
346 { | |
347 const void* key; | |
348 void* value; | |
349 }; | |
350 | |
351 item_t* _items; | |
352 size_t _capacity; | |
353 | |
354 size_t _count; | |
355 | |
356 bool rehash(size_t count); | |
357 | |
358 item_t* get_item(const void* key) | |
359 { | |
360 assert(key); | |
361 assert(_capacity > 0); | |
362 | |
363 size_t hashmod = _capacity - 1; | |
364 size_t bucket = hash(key) & hashmod; | |
365 | |
366 for (size_t probe = 0; probe <= hashmod; ++probe) | |
367 { | |
368 item_t& probe_item = _items[bucket]; | |
369 | |
370 if (probe_item.key == key || probe_item.key == 0) | |
371 return &probe_item; | |
372 | |
373 // hash collision, quadratic probing | |
374 bucket = (bucket + probe + 1) & hashmod; | |
375 } | |
376 | |
377 assert(false && "Hash table is full"); // unreachable | |
378 return 0; | |
379 } | |
380 | |
381 static PUGI__UNSIGNED_OVERFLOW unsigned int hash(const void* key) | |
382 { | |
383 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff); | |
384 | |
385 // MurmurHash3 32-bit finalizer | |
386 h ^= h >> 16; | |
387 h *= 0x85ebca6bu; | |
388 h ^= h >> 13; | |
389 h *= 0xc2b2ae35u; | |
390 h ^= h >> 16; | |
391 | |
392 return h; | |
393 } | |
394 }; | |
395 | |
396 PUGI__FN_NO_INLINE bool compact_hash_table::rehash(size_t count) | |
397 { | |
398 size_t capacity = 32; | |
399 while (count >= capacity - capacity / 4) | |
400 capacity *= 2; | |
401 | |
402 compact_hash_table rt; | |
403 rt._capacity = capacity; | |
404 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); | |
405 | |
406 if (!rt._items) | |
407 return false; | |
408 | |
409 memset(rt._items, 0, sizeof(item_t) * capacity); | |
410 | |
411 for (size_t i = 0; i < _capacity; ++i) | |
412 if (_items[i].key) | |
413 rt.insert(_items[i].key, _items[i].value); | |
414 | |
415 if (_items) | |
416 xml_memory::deallocate(_items); | |
417 | |
418 _capacity = capacity; | |
419 _items = rt._items; | |
420 | |
421 assert(_count == rt._count); | |
422 | |
423 return true; | |
424 } | |
425 | |
426 PUGI__NS_END | |
427 #endif | |
428 | |
429 PUGI__NS_BEGIN | |
430 #ifdef PUGIXML_COMPACT | |
431 static const uintptr_t xml_memory_block_alignment = 4; | |
432 #else | |
433 static const uintptr_t xml_memory_block_alignment = sizeof(void*); | |
434 #endif | |
435 | |
436 // extra metadata bits | |
437 static const uintptr_t xml_memory_page_contents_shared_mask = 64; | |
438 static const uintptr_t xml_memory_page_name_allocated_mask = 32; | |
439 static const uintptr_t xml_memory_page_value_allocated_mask = 16; | |
440 static const uintptr_t xml_memory_page_type_mask = 15; | |
441 | |
442 // combined masks for string uniqueness | |
443 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; | |
444 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; | |
445 | |
446 #ifdef PUGIXML_COMPACT | |
447 #define PUGI__GETHEADER_IMPL(object, page, flags) // unused | |
448 #define PUGI__GETPAGE_IMPL(header) (header).get_page() | |
449 #else | |
450 #define PUGI__GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) | |
451 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
452 #define PUGI__GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) | |
453 #endif | |
454 | |
455 #define PUGI__GETPAGE(n) PUGI__GETPAGE_IMPL((n)->header) | |
456 #define PUGI__NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) | |
457 | |
458 struct xml_allocator; | |
459 | |
460 struct xml_memory_page | |
461 { | |
462 static xml_memory_page* construct(void* memory) | |
463 { | |
464 xml_memory_page* result = static_cast<xml_memory_page*>(memory); | |
465 | |
466 result->allocator = 0; | |
467 result->prev = 0; | |
468 result->next = 0; | |
469 result->busy_size = 0; | |
470 result->freed_size = 0; | |
471 | |
472 #ifdef PUGIXML_COMPACT | |
473 result->compact_string_base = 0; | |
474 result->compact_shared_parent = 0; | |
475 result->compact_page_marker = 0; | |
476 #endif | |
477 | |
478 return result; | |
479 } | |
480 | |
481 xml_allocator* allocator; | |
482 | |
483 xml_memory_page* prev; | |
484 xml_memory_page* next; | |
485 | |
486 size_t busy_size; | |
487 size_t freed_size; | |
488 | |
489 #ifdef PUGIXML_COMPACT | |
490 char_t* compact_string_base; | |
491 void* compact_shared_parent; | |
492 uint32_t* compact_page_marker; | |
493 #endif | |
494 }; | |
495 | |
496 static const size_t xml_memory_page_size = | |
497 #ifdef PUGIXML_MEMORY_PAGE_SIZE | |
498 (PUGIXML_MEMORY_PAGE_SIZE) | |
499 #else | |
500 32768 | |
501 #endif | |
502 - sizeof(xml_memory_page); | |
503 | |
504 struct xml_memory_string_header | |
505 { | |
506 uint16_t page_offset; // offset from page->data | |
507 uint16_t full_size; // 0 if string occupies whole page | |
508 }; | |
509 | |
510 struct xml_allocator | |
511 { | |
512 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) | |
513 { | |
514 #ifdef PUGIXML_COMPACT | |
515 _hash = 0; | |
516 #endif | |
517 } | |
518 | |
519 xml_memory_page* allocate_page(size_t data_size) | |
520 { | |
521 size_t size = sizeof(xml_memory_page) + data_size; | |
522 | |
523 // allocate block with some alignment, leaving memory for worst-case padding | |
524 void* memory = xml_memory::allocate(size); | |
525 if (!memory) return 0; | |
526 | |
527 // prepare page structure | |
528 xml_memory_page* page = xml_memory_page::construct(memory); | |
529 assert(page); | |
530 | |
531 assert(this == _root->allocator); | |
532 page->allocator = this; | |
533 | |
534 return page; | |
535 } | |
536 | |
537 static void deallocate_page(xml_memory_page* page) | |
538 { | |
539 xml_memory::deallocate(page); | |
540 } | |
541 | |
542 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); | |
543 | |
544 void* allocate_memory(size_t size, xml_memory_page*& out_page) | |
545 { | |
546 if (PUGI__UNLIKELY(_busy_size + size > xml_memory_page_size)) | |
547 return allocate_memory_oob(size, out_page); | |
548 | |
549 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; | |
550 | |
551 _busy_size += size; | |
552 | |
553 out_page = _root; | |
554 | |
555 return buf; | |
556 } | |
557 | |
558 #ifdef PUGIXML_COMPACT | |
559 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
560 { | |
561 void* result = allocate_memory(size + sizeof(uint32_t), out_page); | |
562 if (!result) return 0; | |
563 | |
564 // adjust for marker | |
565 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); | |
566 | |
567 if (PUGI__UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) | |
568 { | |
569 // insert new marker | |
570 uint32_t* marker = static_cast<uint32_t*>(result); | |
571 | |
572 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); | |
573 out_page->compact_page_marker = marker; | |
574 | |
575 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block | |
576 // this will make sure deallocate_memory correctly tracks the size | |
577 out_page->freed_size += sizeof(uint32_t); | |
578 | |
579 return marker + 1; | |
580 } | |
581 else | |
582 { | |
583 // roll back uint32_t part | |
584 _busy_size -= sizeof(uint32_t); | |
585 | |
586 return result; | |
587 } | |
588 } | |
589 #else | |
590 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
591 { | |
592 return allocate_memory(size, out_page); | |
593 } | |
594 #endif | |
595 | |
596 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) | |
597 { | |
598 if (page == _root) page->busy_size = _busy_size; | |
599 | |
600 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); | |
601 (void)!ptr; | |
602 | |
603 page->freed_size += size; | |
604 assert(page->freed_size <= page->busy_size); | |
605 | |
606 if (page->freed_size == page->busy_size) | |
607 { | |
608 if (page->next == 0) | |
609 { | |
610 assert(_root == page); | |
611 | |
612 // top page freed, just reset sizes | |
613 page->busy_size = 0; | |
614 page->freed_size = 0; | |
615 | |
616 #ifdef PUGIXML_COMPACT | |
617 // reset compact state to maximize efficiency | |
618 page->compact_string_base = 0; | |
619 page->compact_shared_parent = 0; | |
620 page->compact_page_marker = 0; | |
621 #endif | |
622 | |
623 _busy_size = 0; | |
624 } | |
625 else | |
626 { | |
627 assert(_root != page); | |
628 assert(page->prev); | |
629 | |
630 // remove from the list | |
631 page->prev->next = page->next; | |
632 page->next->prev = page->prev; | |
633 | |
634 // deallocate | |
635 deallocate_page(page); | |
636 } | |
637 } | |
638 } | |
639 | |
640 char_t* allocate_string(size_t length) | |
641 { | |
642 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; | |
643 | |
644 PUGI__STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); | |
645 | |
646 // allocate memory for string and header block | |
647 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); | |
648 | |
649 // round size up to block alignment boundary | |
650 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); | |
651 | |
652 xml_memory_page* page; | |
653 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); | |
654 | |
655 if (!header) return 0; | |
656 | |
657 // setup header | |
658 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); | |
659 | |
660 assert(page_offset % xml_memory_block_alignment == 0); | |
661 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); | |
662 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); | |
663 | |
664 // full_size == 0 for large strings that occupy the whole page | |
665 assert(full_size % xml_memory_block_alignment == 0); | |
666 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); | |
667 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); | |
668 | |
669 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
670 // header is guaranteed a pointer-sized alignment, which should be enough for char_t | |
671 return static_cast<char_t*>(static_cast<void*>(header + 1)); | |
672 } | |
673 | |
674 void deallocate_string(char_t* string) | |
675 { | |
676 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
677 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string | |
678 | |
679 // get header | |
680 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; | |
681 assert(header); | |
682 | |
683 // deallocate | |
684 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; | |
685 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); | |
686 | |
687 // if full_size == 0 then this string occupies the whole page | |
688 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; | |
689 | |
690 deallocate_memory(header, full_size, page); | |
691 } | |
692 | |
693 bool reserve() | |
694 { | |
695 #ifdef PUGIXML_COMPACT | |
696 return _hash->reserve(); | |
697 #else | |
698 return true; | |
699 #endif | |
700 } | |
701 | |
702 xml_memory_page* _root; | |
703 size_t _busy_size; | |
704 | |
705 #ifdef PUGIXML_COMPACT | |
706 compact_hash_table* _hash; | |
707 #endif | |
708 }; | |
709 | |
710 PUGI__FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) | |
711 { | |
712 const size_t large_allocation_threshold = xml_memory_page_size / 4; | |
713 | |
714 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); | |
715 out_page = page; | |
716 | |
717 if (!page) return 0; | |
718 | |
719 if (size <= large_allocation_threshold) | |
720 { | |
721 _root->busy_size = _busy_size; | |
722 | |
723 // insert page at the end of linked list | |
724 page->prev = _root; | |
725 _root->next = page; | |
726 _root = page; | |
727 | |
728 _busy_size = size; | |
729 } | |
730 else | |
731 { | |
732 // insert page before the end of linked list, so that it is deleted as soon as possible | |
733 // the last page is not deleted even if it's empty (see deallocate_memory) | |
734 assert(_root->prev); | |
735 | |
736 page->prev = _root->prev; | |
737 page->next = _root; | |
738 | |
739 _root->prev->next = page; | |
740 _root->prev = page; | |
741 | |
742 page->busy_size = size; | |
743 } | |
744 | |
745 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); | |
746 } | |
747 PUGI__NS_END | |
748 | |
749 #ifdef PUGIXML_COMPACT | |
750 PUGI__NS_BEGIN | |
751 static const uintptr_t compact_alignment_log2 = 2; | |
752 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; | |
753 | |
754 class compact_header | |
755 { | |
756 public: | |
757 compact_header(xml_memory_page* page, unsigned int flags) | |
758 { | |
759 PUGI__STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); | |
760 | |
761 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); | |
762 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); | |
763 | |
764 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); | |
765 _flags = static_cast<unsigned char>(flags); | |
766 } | |
767 | |
768 void operator&=(uintptr_t mod) | |
769 { | |
770 _flags &= static_cast<unsigned char>(mod); | |
771 } | |
772 | |
773 void operator|=(uintptr_t mod) | |
774 { | |
775 _flags |= static_cast<unsigned char>(mod); | |
776 } | |
777 | |
778 uintptr_t operator&(uintptr_t mod) const | |
779 { | |
780 return _flags & mod; | |
781 } | |
782 | |
783 xml_memory_page* get_page() const | |
784 { | |
785 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
786 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); | |
787 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); | |
788 | |
789 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); | |
790 } | |
791 | |
792 private: | |
793 unsigned char _page; | |
794 unsigned char _flags; | |
795 }; | |
796 | |
797 PUGI__FN xml_memory_page* compact_get_page(const void* object, int header_offset) | |
798 { | |
799 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); | |
800 | |
801 return header->get_page(); | |
802 } | |
803 | |
804 template <int header_offset, typename T> PUGI__FN_NO_INLINE T* compact_get_value(const void* object) | |
805 { | |
806 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); | |
807 } | |
808 | |
809 template <int header_offset, typename T> PUGI__FN_NO_INLINE void compact_set_value(const void* object, T* value) | |
810 { | |
811 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); | |
812 } | |
813 | |
814 template <typename T, int header_offset, int start = -126> class compact_pointer | |
815 { | |
816 public: | |
817 compact_pointer(): _data(0) | |
818 { | |
819 } | |
820 | |
821 void operator=(const compact_pointer& rhs) | |
822 { | |
823 *this = rhs + 0; | |
824 } | |
825 | |
826 void operator=(T* value) | |
827 { | |
828 if (value) | |
829 { | |
830 // value is guaranteed to be compact-aligned; 'this' is not | |
831 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
832 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
833 // compensate for arithmetic shift rounding for negative values | |
834 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
835 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; | |
836 | |
837 if (static_cast<uintptr_t>(offset) <= 253) | |
838 _data = static_cast<unsigned char>(offset + 1); | |
839 else | |
840 { | |
841 compact_set_value<header_offset>(this, value); | |
842 | |
843 _data = 255; | |
844 } | |
845 } | |
846 else | |
847 _data = 0; | |
848 } | |
849 | |
850 operator T*() const | |
851 { | |
852 if (_data) | |
853 { | |
854 if (_data < 255) | |
855 { | |
856 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
857 | |
858 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); | |
859 } | |
860 else | |
861 return compact_get_value<header_offset, T>(this); | |
862 } | |
863 else | |
864 return 0; | |
865 } | |
866 | |
867 T* operator->() const | |
868 { | |
869 return *this; | |
870 } | |
871 | |
872 private: | |
873 unsigned char _data; | |
874 }; | |
875 | |
876 template <typename T, int header_offset> class compact_pointer_parent | |
877 { | |
878 public: | |
879 compact_pointer_parent(): _data(0) | |
880 { | |
881 } | |
882 | |
883 void operator=(const compact_pointer_parent& rhs) | |
884 { | |
885 *this = rhs + 0; | |
886 } | |
887 | |
888 void operator=(T* value) | |
889 { | |
890 if (value) | |
891 { | |
892 // value is guaranteed to be compact-aligned; 'this' is not | |
893 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
894 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
895 // compensate for arithmetic shift behavior for negative values | |
896 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
897 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; | |
898 | |
899 if (static_cast<uintptr_t>(offset) <= 65533) | |
900 { | |
901 _data = static_cast<unsigned short>(offset + 1); | |
902 } | |
903 else | |
904 { | |
905 xml_memory_page* page = compact_get_page(this, header_offset); | |
906 | |
907 if (PUGI__UNLIKELY(page->compact_shared_parent == 0)) | |
908 page->compact_shared_parent = value; | |
909 | |
910 if (page->compact_shared_parent == value) | |
911 { | |
912 _data = 65534; | |
913 } | |
914 else | |
915 { | |
916 compact_set_value<header_offset>(this, value); | |
917 | |
918 _data = 65535; | |
919 } | |
920 } | |
921 } | |
922 else | |
923 { | |
924 _data = 0; | |
925 } | |
926 } | |
927 | |
928 operator T*() const | |
929 { | |
930 if (_data) | |
931 { | |
932 if (_data < 65534) | |
933 { | |
934 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
935 | |
936 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); | |
937 } | |
938 else if (_data == 65534) | |
939 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); | |
940 else | |
941 return compact_get_value<header_offset, T>(this); | |
942 } | |
943 else | |
944 return 0; | |
945 } | |
946 | |
947 T* operator->() const | |
948 { | |
949 return *this; | |
950 } | |
951 | |
952 private: | |
953 uint16_t _data; | |
954 }; | |
955 | |
956 template <int header_offset, int base_offset> class compact_string | |
957 { | |
958 public: | |
959 compact_string(): _data(0) | |
960 { | |
961 } | |
962 | |
963 void operator=(const compact_string& rhs) | |
964 { | |
965 *this = rhs + 0; | |
966 } | |
967 | |
968 void operator=(char_t* value) | |
969 { | |
970 if (value) | |
971 { | |
972 xml_memory_page* page = compact_get_page(this, header_offset); | |
973 | |
974 if (PUGI__UNLIKELY(page->compact_string_base == 0)) | |
975 page->compact_string_base = value; | |
976 | |
977 ptrdiff_t offset = value - page->compact_string_base; | |
978 | |
979 if (static_cast<uintptr_t>(offset) < (65535 << 7)) | |
980 { | |
981 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
982 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); | |
983 | |
984 if (*base == 0) | |
985 { | |
986 *base = static_cast<uint16_t>((offset >> 7) + 1); | |
987 _data = static_cast<unsigned char>((offset & 127) + 1); | |
988 } | |
989 else | |
990 { | |
991 ptrdiff_t remainder = offset - ((*base - 1) << 7); | |
992 | |
993 if (static_cast<uintptr_t>(remainder) <= 253) | |
994 { | |
995 _data = static_cast<unsigned char>(remainder + 1); | |
996 } | |
997 else | |
998 { | |
999 compact_set_value<header_offset>(this, value); | |
1000 | |
1001 _data = 255; | |
1002 } | |
1003 } | |
1004 } | |
1005 else | |
1006 { | |
1007 compact_set_value<header_offset>(this, value); | |
1008 | |
1009 _data = 255; | |
1010 } | |
1011 } | |
1012 else | |
1013 { | |
1014 _data = 0; | |
1015 } | |
1016 } | |
1017 | |
1018 operator char_t*() const | |
1019 { | |
1020 if (_data) | |
1021 { | |
1022 if (_data < 255) | |
1023 { | |
1024 xml_memory_page* page = compact_get_page(this, header_offset); | |
1025 | |
1026 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
1027 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); | |
1028 assert(*base); | |
1029 | |
1030 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); | |
1031 | |
1032 return page->compact_string_base + offset; | |
1033 } | |
1034 else | |
1035 { | |
1036 return compact_get_value<header_offset, char_t>(this); | |
1037 } | |
1038 } | |
1039 else | |
1040 return 0; | |
1041 } | |
1042 | |
1043 private: | |
1044 unsigned char _data; | |
1045 }; | |
1046 PUGI__NS_END | |
1047 #endif | |
1048 | |
1049 #ifdef PUGIXML_COMPACT | |
1050 namespace pugi | |
1051 { | |
1052 struct xml_attribute_struct | |
1053 { | |
1054 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) | |
1055 { | |
1056 PUGI__STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); | |
1057 } | |
1058 | |
1059 impl::compact_header header; | |
1060 | |
1061 uint16_t namevalue_base; | |
1062 | |
1063 impl::compact_string<4, 2> name; | |
1064 impl::compact_string<5, 3> value; | |
1065 | |
1066 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; | |
1067 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; | |
1068 }; | |
1069 | |
1070 struct xml_node_struct | |
1071 { | |
1072 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) | |
1073 { | |
1074 PUGI__STATIC_ASSERT(sizeof(xml_node_struct) == 12); | |
1075 } | |
1076 | |
1077 impl::compact_header header; | |
1078 | |
1079 uint16_t namevalue_base; | |
1080 | |
1081 impl::compact_string<4, 2> name; | |
1082 impl::compact_string<5, 3> value; | |
1083 | |
1084 impl::compact_pointer_parent<xml_node_struct, 6> parent; | |
1085 | |
1086 impl::compact_pointer<xml_node_struct, 8, 0> first_child; | |
1087 | |
1088 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; | |
1089 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; | |
1090 | |
1091 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; | |
1092 }; | |
1093 } | |
1094 #else | |
1095 namespace pugi | |
1096 { | |
1097 struct xml_attribute_struct | |
1098 { | |
1099 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) | |
1100 { | |
1101 header = PUGI__GETHEADER_IMPL(this, page, 0); | |
1102 } | |
1103 | |
1104 uintptr_t header; | |
1105 | |
1106 char_t* name; | |
1107 char_t* value; | |
1108 | |
1109 xml_attribute_struct* prev_attribute_c; | |
1110 xml_attribute_struct* next_attribute; | |
1111 }; | |
1112 | |
1113 struct xml_node_struct | |
1114 { | |
1115 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) | |
1116 { | |
1117 header = PUGI__GETHEADER_IMPL(this, page, type); | |
1118 } | |
1119 | |
1120 uintptr_t header; | |
1121 | |
1122 char_t* name; | |
1123 char_t* value; | |
1124 | |
1125 xml_node_struct* parent; | |
1126 | |
1127 xml_node_struct* first_child; | |
1128 | |
1129 xml_node_struct* prev_sibling_c; | |
1130 xml_node_struct* next_sibling; | |
1131 | |
1132 xml_attribute_struct* first_attribute; | |
1133 }; | |
1134 } | |
1135 #endif | |
1136 | |
1137 PUGI__NS_BEGIN | |
1138 struct xml_extra_buffer | |
1139 { | |
1140 char_t* buffer; | |
1141 xml_extra_buffer* next; | |
1142 }; | |
1143 | |
1144 struct xml_document_struct: public xml_node_struct, public xml_allocator | |
1145 { | |
1146 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) | |
1147 { | |
1148 } | |
1149 | |
1150 const char_t* buffer; | |
1151 | |
1152 xml_extra_buffer* extra_buffers; | |
1153 | |
1154 #ifdef PUGIXML_COMPACT | |
1155 compact_hash_table hash; | |
1156 #endif | |
1157 }; | |
1158 | |
1159 template <typename Object> inline xml_allocator& get_allocator(const Object* object) | |
1160 { | |
1161 assert(object); | |
1162 | |
1163 return *PUGI__GETPAGE(object)->allocator; | |
1164 } | |
1165 | |
1166 template <typename Object> inline xml_document_struct& get_document(const Object* object) | |
1167 { | |
1168 assert(object); | |
1169 | |
1170 return *static_cast<xml_document_struct*>(PUGI__GETPAGE(object)->allocator); | |
1171 } | |
1172 PUGI__NS_END | |
1173 | |
1174 // Low-level DOM operations | |
1175 PUGI__NS_BEGIN | |
1176 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) | |
1177 { | |
1178 xml_memory_page* page; | |
1179 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); | |
1180 if (!memory) return 0; | |
1181 | |
1182 return new (memory) xml_attribute_struct(page); | |
1183 } | |
1184 | |
1185 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) | |
1186 { | |
1187 xml_memory_page* page; | |
1188 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); | |
1189 if (!memory) return 0; | |
1190 | |
1191 return new (memory) xml_node_struct(page, type); | |
1192 } | |
1193 | |
1194 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) | |
1195 { | |
1196 if (a->header & impl::xml_memory_page_name_allocated_mask) | |
1197 alloc.deallocate_string(a->name); | |
1198 | |
1199 if (a->header & impl::xml_memory_page_value_allocated_mask) | |
1200 alloc.deallocate_string(a->value); | |
1201 | |
1202 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI__GETPAGE(a)); | |
1203 } | |
1204 | |
1205 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) | |
1206 { | |
1207 if (n->header & impl::xml_memory_page_name_allocated_mask) | |
1208 alloc.deallocate_string(n->name); | |
1209 | |
1210 if (n->header & impl::xml_memory_page_value_allocated_mask) | |
1211 alloc.deallocate_string(n->value); | |
1212 | |
1213 for (xml_attribute_struct* attr = n->first_attribute; attr; ) | |
1214 { | |
1215 xml_attribute_struct* next = attr->next_attribute; | |
1216 | |
1217 destroy_attribute(attr, alloc); | |
1218 | |
1219 attr = next; | |
1220 } | |
1221 | |
1222 for (xml_node_struct* child = n->first_child; child; ) | |
1223 { | |
1224 xml_node_struct* next = child->next_sibling; | |
1225 | |
1226 destroy_node(child, alloc); | |
1227 | |
1228 child = next; | |
1229 } | |
1230 | |
1231 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI__GETPAGE(n)); | |
1232 } | |
1233 | |
1234 inline void append_node(xml_node_struct* child, xml_node_struct* node) | |
1235 { | |
1236 child->parent = node; | |
1237 | |
1238 xml_node_struct* head = node->first_child; | |
1239 | |
1240 if (head) | |
1241 { | |
1242 xml_node_struct* tail = head->prev_sibling_c; | |
1243 | |
1244 tail->next_sibling = child; | |
1245 child->prev_sibling_c = tail; | |
1246 head->prev_sibling_c = child; | |
1247 } | |
1248 else | |
1249 { | |
1250 node->first_child = child; | |
1251 child->prev_sibling_c = child; | |
1252 } | |
1253 } | |
1254 | |
1255 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) | |
1256 { | |
1257 child->parent = node; | |
1258 | |
1259 xml_node_struct* head = node->first_child; | |
1260 | |
1261 if (head) | |
1262 { | |
1263 child->prev_sibling_c = head->prev_sibling_c; | |
1264 head->prev_sibling_c = child; | |
1265 } | |
1266 else | |
1267 child->prev_sibling_c = child; | |
1268 | |
1269 child->next_sibling = head; | |
1270 node->first_child = child; | |
1271 } | |
1272 | |
1273 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) | |
1274 { | |
1275 xml_node_struct* parent = node->parent; | |
1276 | |
1277 child->parent = parent; | |
1278 | |
1279 xml_node_struct* next = node->next_sibling; | |
1280 | |
1281 if (next) | |
1282 next->prev_sibling_c = child; | |
1283 else | |
1284 parent->first_child->prev_sibling_c = child; | |
1285 | |
1286 child->next_sibling = next; | |
1287 child->prev_sibling_c = node; | |
1288 | |
1289 node->next_sibling = child; | |
1290 } | |
1291 | |
1292 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) | |
1293 { | |
1294 xml_node_struct* parent = node->parent; | |
1295 | |
1296 child->parent = parent; | |
1297 | |
1298 xml_node_struct* prev = node->prev_sibling_c; | |
1299 | |
1300 if (prev->next_sibling) | |
1301 prev->next_sibling = child; | |
1302 else | |
1303 parent->first_child = child; | |
1304 | |
1305 child->prev_sibling_c = prev; | |
1306 child->next_sibling = node; | |
1307 | |
1308 node->prev_sibling_c = child; | |
1309 } | |
1310 | |
1311 inline void remove_node(xml_node_struct* node) | |
1312 { | |
1313 xml_node_struct* parent = node->parent; | |
1314 | |
1315 xml_node_struct* next = node->next_sibling; | |
1316 xml_node_struct* prev = node->prev_sibling_c; | |
1317 | |
1318 if (next) | |
1319 next->prev_sibling_c = prev; | |
1320 else | |
1321 parent->first_child->prev_sibling_c = prev; | |
1322 | |
1323 if (prev->next_sibling) | |
1324 prev->next_sibling = next; | |
1325 else | |
1326 parent->first_child = next; | |
1327 | |
1328 node->parent = 0; | |
1329 node->prev_sibling_c = 0; | |
1330 node->next_sibling = 0; | |
1331 } | |
1332 | |
1333 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1334 { | |
1335 xml_attribute_struct* head = node->first_attribute; | |
1336 | |
1337 if (head) | |
1338 { | |
1339 xml_attribute_struct* tail = head->prev_attribute_c; | |
1340 | |
1341 tail->next_attribute = attr; | |
1342 attr->prev_attribute_c = tail; | |
1343 head->prev_attribute_c = attr; | |
1344 } | |
1345 else | |
1346 { | |
1347 node->first_attribute = attr; | |
1348 attr->prev_attribute_c = attr; | |
1349 } | |
1350 } | |
1351 | |
1352 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1353 { | |
1354 xml_attribute_struct* head = node->first_attribute; | |
1355 | |
1356 if (head) | |
1357 { | |
1358 attr->prev_attribute_c = head->prev_attribute_c; | |
1359 head->prev_attribute_c = attr; | |
1360 } | |
1361 else | |
1362 attr->prev_attribute_c = attr; | |
1363 | |
1364 attr->next_attribute = head; | |
1365 node->first_attribute = attr; | |
1366 } | |
1367 | |
1368 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
1369 { | |
1370 xml_attribute_struct* next = place->next_attribute; | |
1371 | |
1372 if (next) | |
1373 next->prev_attribute_c = attr; | |
1374 else | |
1375 node->first_attribute->prev_attribute_c = attr; | |
1376 | |
1377 attr->next_attribute = next; | |
1378 attr->prev_attribute_c = place; | |
1379 place->next_attribute = attr; | |
1380 } | |
1381 | |
1382 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
1383 { | |
1384 xml_attribute_struct* prev = place->prev_attribute_c; | |
1385 | |
1386 if (prev->next_attribute) | |
1387 prev->next_attribute = attr; | |
1388 else | |
1389 node->first_attribute = attr; | |
1390 | |
1391 attr->prev_attribute_c = prev; | |
1392 attr->next_attribute = place; | |
1393 place->prev_attribute_c = attr; | |
1394 } | |
1395 | |
1396 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1397 { | |
1398 xml_attribute_struct* next = attr->next_attribute; | |
1399 xml_attribute_struct* prev = attr->prev_attribute_c; | |
1400 | |
1401 if (next) | |
1402 next->prev_attribute_c = prev; | |
1403 else | |
1404 node->first_attribute->prev_attribute_c = prev; | |
1405 | |
1406 if (prev->next_attribute) | |
1407 prev->next_attribute = next; | |
1408 else | |
1409 node->first_attribute = next; | |
1410 | |
1411 attr->prev_attribute_c = 0; | |
1412 attr->next_attribute = 0; | |
1413 } | |
1414 | |
1415 PUGI__FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) | |
1416 { | |
1417 if (!alloc.reserve()) return 0; | |
1418 | |
1419 xml_node_struct* child = allocate_node(alloc, type); | |
1420 if (!child) return 0; | |
1421 | |
1422 append_node(child, node); | |
1423 | |
1424 return child; | |
1425 } | |
1426 | |
1427 PUGI__FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) | |
1428 { | |
1429 if (!alloc.reserve()) return 0; | |
1430 | |
1431 xml_attribute_struct* attr = allocate_attribute(alloc); | |
1432 if (!attr) return 0; | |
1433 | |
1434 append_attribute(attr, node); | |
1435 | |
1436 return attr; | |
1437 } | |
1438 PUGI__NS_END | |
1439 | |
1440 // Helper classes for code generation | |
1441 PUGI__NS_BEGIN | |
1442 struct opt_false | |
1443 { | |
1444 enum { value = 0 }; | |
1445 }; | |
1446 | |
1447 struct opt_true | |
1448 { | |
1449 enum { value = 1 }; | |
1450 }; | |
1451 PUGI__NS_END | |
1452 | |
1453 // Unicode utilities | |
1454 PUGI__NS_BEGIN | |
1455 inline uint16_t endian_swap(uint16_t value) | |
1456 { | |
1457 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); | |
1458 } | |
1459 | |
1460 inline uint32_t endian_swap(uint32_t value) | |
1461 { | |
1462 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); | |
1463 } | |
1464 | |
1465 struct utf8_counter | |
1466 { | |
1467 typedef size_t value_type; | |
1468 | |
1469 static value_type low(value_type result, uint32_t ch) | |
1470 { | |
1471 // U+0000..U+007F | |
1472 if (ch < 0x80) return result + 1; | |
1473 // U+0080..U+07FF | |
1474 else if (ch < 0x800) return result + 2; | |
1475 // U+0800..U+FFFF | |
1476 else return result + 3; | |
1477 } | |
1478 | |
1479 static value_type high(value_type result, uint32_t) | |
1480 { | |
1481 // U+10000..U+10FFFF | |
1482 return result + 4; | |
1483 } | |
1484 }; | |
1485 | |
1486 struct utf8_writer | |
1487 { | |
1488 typedef uint8_t* value_type; | |
1489 | |
1490 static value_type low(value_type result, uint32_t ch) | |
1491 { | |
1492 // U+0000..U+007F | |
1493 if (ch < 0x80) | |
1494 { | |
1495 *result = static_cast<uint8_t>(ch); | |
1496 return result + 1; | |
1497 } | |
1498 // U+0080..U+07FF | |
1499 else if (ch < 0x800) | |
1500 { | |
1501 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); | |
1502 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1503 return result + 2; | |
1504 } | |
1505 // U+0800..U+FFFF | |
1506 else | |
1507 { | |
1508 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); | |
1509 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
1510 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1511 return result + 3; | |
1512 } | |
1513 } | |
1514 | |
1515 static value_type high(value_type result, uint32_t ch) | |
1516 { | |
1517 // U+10000..U+10FFFF | |
1518 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); | |
1519 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); | |
1520 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
1521 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1522 return result + 4; | |
1523 } | |
1524 | |
1525 static value_type any(value_type result, uint32_t ch) | |
1526 { | |
1527 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
1528 } | |
1529 }; | |
1530 | |
1531 struct utf16_counter | |
1532 { | |
1533 typedef size_t value_type; | |
1534 | |
1535 static value_type low(value_type result, uint32_t) | |
1536 { | |
1537 return result + 1; | |
1538 } | |
1539 | |
1540 static value_type high(value_type result, uint32_t) | |
1541 { | |
1542 return result + 2; | |
1543 } | |
1544 }; | |
1545 | |
1546 struct utf16_writer | |
1547 { | |
1548 typedef uint16_t* value_type; | |
1549 | |
1550 static value_type low(value_type result, uint32_t ch) | |
1551 { | |
1552 *result = static_cast<uint16_t>(ch); | |
1553 | |
1554 return result + 1; | |
1555 } | |
1556 | |
1557 static value_type high(value_type result, uint32_t ch) | |
1558 { | |
1559 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; | |
1560 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; | |
1561 | |
1562 result[0] = static_cast<uint16_t>(0xD800 + msh); | |
1563 result[1] = static_cast<uint16_t>(0xDC00 + lsh); | |
1564 | |
1565 return result + 2; | |
1566 } | |
1567 | |
1568 static value_type any(value_type result, uint32_t ch) | |
1569 { | |
1570 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
1571 } | |
1572 }; | |
1573 | |
1574 struct utf32_counter | |
1575 { | |
1576 typedef size_t value_type; | |
1577 | |
1578 static value_type low(value_type result, uint32_t) | |
1579 { | |
1580 return result + 1; | |
1581 } | |
1582 | |
1583 static value_type high(value_type result, uint32_t) | |
1584 { | |
1585 return result + 1; | |
1586 } | |
1587 }; | |
1588 | |
1589 struct utf32_writer | |
1590 { | |
1591 typedef uint32_t* value_type; | |
1592 | |
1593 static value_type low(value_type result, uint32_t ch) | |
1594 { | |
1595 *result = ch; | |
1596 | |
1597 return result + 1; | |
1598 } | |
1599 | |
1600 static value_type high(value_type result, uint32_t ch) | |
1601 { | |
1602 *result = ch; | |
1603 | |
1604 return result + 1; | |
1605 } | |
1606 | |
1607 static value_type any(value_type result, uint32_t ch) | |
1608 { | |
1609 *result = ch; | |
1610 | |
1611 return result + 1; | |
1612 } | |
1613 }; | |
1614 | |
1615 struct latin1_writer | |
1616 { | |
1617 typedef uint8_t* value_type; | |
1618 | |
1619 static value_type low(value_type result, uint32_t ch) | |
1620 { | |
1621 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); | |
1622 | |
1623 return result + 1; | |
1624 } | |
1625 | |
1626 static value_type high(value_type result, uint32_t ch) | |
1627 { | |
1628 (void)ch; | |
1629 | |
1630 *result = '?'; | |
1631 | |
1632 return result + 1; | |
1633 } | |
1634 }; | |
1635 | |
1636 struct utf8_decoder | |
1637 { | |
1638 typedef uint8_t type; | |
1639 | |
1640 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
1641 { | |
1642 const uint8_t utf8_byte_mask = 0x3f; | |
1643 | |
1644 while (size) | |
1645 { | |
1646 uint8_t lead = *data; | |
1647 | |
1648 // 0xxxxxxx -> U+0000..U+007F | |
1649 if (lead < 0x80) | |
1650 { | |
1651 result = Traits::low(result, lead); | |
1652 data += 1; | |
1653 size -= 1; | |
1654 | |
1655 // process aligned single-byte (ascii) blocks | |
1656 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) | |
1657 { | |
1658 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
1659 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) | |
1660 { | |
1661 result = Traits::low(result, data[0]); | |
1662 result = Traits::low(result, data[1]); | |
1663 result = Traits::low(result, data[2]); | |
1664 result = Traits::low(result, data[3]); | |
1665 data += 4; | |
1666 size -= 4; | |
1667 } | |
1668 } | |
1669 } | |
1670 // 110xxxxx -> U+0080..U+07FF | |
1671 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) | |
1672 { | |
1673 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); | |
1674 data += 2; | |
1675 size -= 2; | |
1676 } | |
1677 // 1110xxxx -> U+0800-U+FFFF | |
1678 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) | |
1679 { | |
1680 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); | |
1681 data += 3; | |
1682 size -= 3; | |
1683 } | |
1684 // 11110xxx -> U+10000..U+10FFFF | |
1685 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) | |
1686 { | |
1687 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); | |
1688 data += 4; | |
1689 size -= 4; | |
1690 } | |
1691 // 10xxxxxx or 11111xxx -> invalid | |
1692 else | |
1693 { | |
1694 data += 1; | |
1695 size -= 1; | |
1696 } | |
1697 } | |
1698 | |
1699 return result; | |
1700 } | |
1701 }; | |
1702 | |
1703 template <typename opt_swap> struct utf16_decoder | |
1704 { | |
1705 typedef uint16_t type; | |
1706 | |
1707 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) | |
1708 { | |
1709 while (size) | |
1710 { | |
1711 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
1712 | |
1713 // U+0000..U+D7FF | |
1714 if (lead < 0xD800) | |
1715 { | |
1716 result = Traits::low(result, lead); | |
1717 data += 1; | |
1718 size -= 1; | |
1719 } | |
1720 // U+E000..U+FFFF | |
1721 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) | |
1722 { | |
1723 result = Traits::low(result, lead); | |
1724 data += 1; | |
1725 size -= 1; | |
1726 } | |
1727 // surrogate pair lead | |
1728 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) | |
1729 { | |
1730 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; | |
1731 | |
1732 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) | |
1733 { | |
1734 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); | |
1735 data += 2; | |
1736 size -= 2; | |
1737 } | |
1738 else | |
1739 { | |
1740 data += 1; | |
1741 size -= 1; | |
1742 } | |
1743 } | |
1744 else | |
1745 { | |
1746 data += 1; | |
1747 size -= 1; | |
1748 } | |
1749 } | |
1750 | |
1751 return result; | |
1752 } | |
1753 }; | |
1754 | |
1755 template <typename opt_swap> struct utf32_decoder | |
1756 { | |
1757 typedef uint32_t type; | |
1758 | |
1759 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) | |
1760 { | |
1761 while (size) | |
1762 { | |
1763 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
1764 | |
1765 // U+0000..U+FFFF | |
1766 if (lead < 0x10000) | |
1767 { | |
1768 result = Traits::low(result, lead); | |
1769 data += 1; | |
1770 size -= 1; | |
1771 } | |
1772 // U+10000..U+10FFFF | |
1773 else | |
1774 { | |
1775 result = Traits::high(result, lead); | |
1776 data += 1; | |
1777 size -= 1; | |
1778 } | |
1779 } | |
1780 | |
1781 return result; | |
1782 } | |
1783 }; | |
1784 | |
1785 struct latin1_decoder | |
1786 { | |
1787 typedef uint8_t type; | |
1788 | |
1789 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
1790 { | |
1791 while (size) | |
1792 { | |
1793 result = Traits::low(result, *data); | |
1794 data += 1; | |
1795 size -= 1; | |
1796 } | |
1797 | |
1798 return result; | |
1799 } | |
1800 }; | |
1801 | |
1802 template <size_t size> struct wchar_selector; | |
1803 | |
1804 template <> struct wchar_selector<2> | |
1805 { | |
1806 typedef uint16_t type; | |
1807 typedef utf16_counter counter; | |
1808 typedef utf16_writer writer; | |
1809 typedef utf16_decoder<opt_false> decoder; | |
1810 }; | |
1811 | |
1812 template <> struct wchar_selector<4> | |
1813 { | |
1814 typedef uint32_t type; | |
1815 typedef utf32_counter counter; | |
1816 typedef utf32_writer writer; | |
1817 typedef utf32_decoder<opt_false> decoder; | |
1818 }; | |
1819 | |
1820 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; | |
1821 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; | |
1822 | |
1823 struct wchar_decoder | |
1824 { | |
1825 typedef wchar_t type; | |
1826 | |
1827 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) | |
1828 { | |
1829 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; | |
1830 | |
1831 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); | |
1832 } | |
1833 }; | |
1834 | |
1835 #ifdef PUGIXML_WCHAR_MODE | |
1836 PUGI__FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) | |
1837 { | |
1838 for (size_t i = 0; i < length; ++i) | |
1839 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); | |
1840 } | |
1841 #endif | |
1842 PUGI__NS_END | |
1843 | |
1844 PUGI__NS_BEGIN | |
1845 enum chartype_t | |
1846 { | |
1847 ct_parse_pcdata = 1, // \0, &, \r, < | |
1848 ct_parse_attr = 2, // \0, &, \r, ', " | |
1849 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab | |
1850 ct_space = 8, // \r, \n, space, tab | |
1851 ct_parse_cdata = 16, // \0, ], >, \r | |
1852 ct_parse_comment = 32, // \0, -, >, \r | |
1853 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . | |
1854 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : | |
1855 }; | |
1856 | |
1857 static const unsigned char chartype_table[256] = | |
1858 { | |
1859 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 | |
1860 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 | |
1861 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 | |
1862 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 | |
1863 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 | |
1864 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 | |
1865 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 | |
1866 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 | |
1867 | |
1868 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ | |
1869 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1870 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1871 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1872 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1873 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1874 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1875 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 | |
1876 }; | |
1877 | |
1878 enum chartypex_t | |
1879 { | |
1880 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > | |
1881 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' | |
1882 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ | |
1883 ctx_digit = 8, // 0-9 | |
1884 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . | |
1885 }; | |
1886 | |
1887 static const unsigned char chartypex_table[256] = | |
1888 { | |
1889 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 | |
1890 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 | |
1891 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 | |
1892 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 | |
1893 | |
1894 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 | |
1895 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 | |
1896 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 | |
1897 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 | |
1898 | |
1899 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ | |
1900 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1901 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1902 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1903 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1904 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1905 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1906 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 | |
1907 }; | |
1908 | |
1909 #ifdef PUGIXML_WCHAR_MODE | |
1910 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) | |
1911 #else | |
1912 #define PUGI__IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) | |
1913 #endif | |
1914 | |
1915 #define PUGI__IS_CHARTYPE(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartype_table) | |
1916 #define PUGI__IS_CHARTYPEX(c, ct) PUGI__IS_CHARTYPE_IMPL(c, ct, chartypex_table) | |
1917 | |
1918 PUGI__FN bool is_little_endian() | |
1919 { | |
1920 unsigned int ui = 1; | |
1921 | |
1922 return *reinterpret_cast<unsigned char*>(&ui) == 1; | |
1923 } | |
1924 | |
1925 PUGI__FN xml_encoding get_wchar_encoding() | |
1926 { | |
1927 PUGI__STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | |
1928 | |
1929 if (sizeof(wchar_t) == 2) | |
1930 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
1931 else | |
1932 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
1933 } | |
1934 | |
1935 PUGI__FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) | |
1936 { | |
1937 #define PUGI__SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } | |
1938 #define PUGI__SCANCHARTYPE(ct) { while (offset < size && PUGI__IS_CHARTYPE(data[offset], ct)) offset++; } | |
1939 | |
1940 // check if we have a non-empty XML declaration | |
1941 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI__IS_CHARTYPE(data[5], ct_space))) | |
1942 return false; | |
1943 | |
1944 // scan XML declaration until the encoding field | |
1945 for (size_t i = 6; i + 1 < size; ++i) | |
1946 { | |
1947 // declaration can not contain ? in quoted values | |
1948 if (data[i] == '?') | |
1949 return false; | |
1950 | |
1951 if (data[i] == 'e' && data[i + 1] == 'n') | |
1952 { | |
1953 size_t offset = i; | |
1954 | |
1955 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed | |
1956 PUGI__SCANCHAR('e'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('c'); PUGI__SCANCHAR('o'); | |
1957 PUGI__SCANCHAR('d'); PUGI__SCANCHAR('i'); PUGI__SCANCHAR('n'); PUGI__SCANCHAR('g'); | |
1958 | |
1959 // S? = S? | |
1960 PUGI__SCANCHARTYPE(ct_space); | |
1961 PUGI__SCANCHAR('='); | |
1962 PUGI__SCANCHARTYPE(ct_space); | |
1963 | |
1964 // the only two valid delimiters are ' and " | |
1965 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; | |
1966 | |
1967 PUGI__SCANCHAR(delimiter); | |
1968 | |
1969 size_t start = offset; | |
1970 | |
1971 out_encoding = data + offset; | |
1972 | |
1973 PUGI__SCANCHARTYPE(ct_symbol); | |
1974 | |
1975 out_length = offset - start; | |
1976 | |
1977 PUGI__SCANCHAR(delimiter); | |
1978 | |
1979 return true; | |
1980 } | |
1981 } | |
1982 | |
1983 return false; | |
1984 | |
1985 #undef PUGI__SCANCHAR | |
1986 #undef PUGI__SCANCHARTYPE | |
1987 } | |
1988 | |
1989 PUGI__FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) | |
1990 { | |
1991 // skip encoding autodetection if input buffer is too small | |
1992 if (size < 4) return encoding_utf8; | |
1993 | |
1994 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; | |
1995 | |
1996 // look for BOM in first few bytes | |
1997 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; | |
1998 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
1999 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; | |
2000 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; | |
2001 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; | |
2002 | |
2003 // look for <, <? or <?xm in various encodings | |
2004 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; | |
2005 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
2006 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; | |
2007 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; | |
2008 | |
2009 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) | |
2010 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; | |
2011 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; | |
2012 | |
2013 // no known BOM detected; parse declaration | |
2014 const uint8_t* enc = 0; | |
2015 size_t enc_length = 0; | |
2016 | |
2017 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) | |
2018 { | |
2019 // iso-8859-1 (case-insensitive) | |
2020 if (enc_length == 10 | |
2021 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' | |
2022 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' | |
2023 && enc[8] == '-' && enc[9] == '1') | |
2024 return encoding_latin1; | |
2025 | |
2026 // latin1 (case-insensitive) | |
2027 if (enc_length == 6 | |
2028 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' | |
2029 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' | |
2030 && enc[5] == '1') | |
2031 return encoding_latin1; | |
2032 } | |
2033 | |
2034 return encoding_utf8; | |
2035 } | |
2036 | |
2037 PUGI__FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) | |
2038 { | |
2039 // replace wchar encoding with utf implementation | |
2040 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
2041 | |
2042 // replace utf16 encoding with utf16 with specific endianness | |
2043 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2044 | |
2045 // replace utf32 encoding with utf32 with specific endianness | |
2046 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2047 | |
2048 // only do autodetection if no explicit encoding is requested | |
2049 if (encoding != encoding_auto) return encoding; | |
2050 | |
2051 // try to guess encoding (based on XML specification, Appendix F.1) | |
2052 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
2053 | |
2054 return guess_buffer_encoding(data, size); | |
2055 } | |
2056 | |
2057 PUGI__FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2058 { | |
2059 size_t length = size / sizeof(char_t); | |
2060 | |
2061 if (is_mutable) | |
2062 { | |
2063 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); | |
2064 out_length = length; | |
2065 } | |
2066 else | |
2067 { | |
2068 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2069 if (!buffer) return false; | |
2070 | |
2071 if (contents) | |
2072 memcpy(buffer, contents, length * sizeof(char_t)); | |
2073 else | |
2074 assert(length == 0); | |
2075 | |
2076 buffer[length] = 0; | |
2077 | |
2078 out_buffer = buffer; | |
2079 out_length = length + 1; | |
2080 } | |
2081 | |
2082 return true; | |
2083 } | |
2084 | |
2085 #ifdef PUGIXML_WCHAR_MODE | |
2086 PUGI__FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) | |
2087 { | |
2088 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || | |
2089 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); | |
2090 } | |
2091 | |
2092 PUGI__FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2093 { | |
2094 const char_t* data = static_cast<const char_t*>(contents); | |
2095 size_t length = size / sizeof(char_t); | |
2096 | |
2097 if (is_mutable) | |
2098 { | |
2099 char_t* buffer = const_cast<char_t*>(data); | |
2100 | |
2101 convert_wchar_endian_swap(buffer, data, length); | |
2102 | |
2103 out_buffer = buffer; | |
2104 out_length = length; | |
2105 } | |
2106 else | |
2107 { | |
2108 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2109 if (!buffer) return false; | |
2110 | |
2111 convert_wchar_endian_swap(buffer, data, length); | |
2112 buffer[length] = 0; | |
2113 | |
2114 out_buffer = buffer; | |
2115 out_length = length + 1; | |
2116 } | |
2117 | |
2118 return true; | |
2119 } | |
2120 | |
2121 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
2122 { | |
2123 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
2124 size_t data_length = size / sizeof(typename D::type); | |
2125 | |
2126 // first pass: get length in wchar_t units | |
2127 size_t length = D::process(data, data_length, 0, wchar_counter()); | |
2128 | |
2129 // allocate buffer of suitable length | |
2130 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2131 if (!buffer) return false; | |
2132 | |
2133 // second pass: convert utf16 input to wchar_t | |
2134 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); | |
2135 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); | |
2136 | |
2137 assert(oend == obegin + length); | |
2138 *oend = 0; | |
2139 | |
2140 out_buffer = buffer; | |
2141 out_length = length + 1; | |
2142 | |
2143 return true; | |
2144 } | |
2145 | |
2146 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
2147 { | |
2148 // get native encoding | |
2149 xml_encoding wchar_encoding = get_wchar_encoding(); | |
2150 | |
2151 // fast path: no conversion required | |
2152 if (encoding == wchar_encoding) | |
2153 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2154 | |
2155 // only endian-swapping is required | |
2156 if (need_endian_swap_utf(encoding, wchar_encoding)) | |
2157 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); | |
2158 | |
2159 // source encoding is utf8 | |
2160 if (encoding == encoding_utf8) | |
2161 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); | |
2162 | |
2163 // source encoding is utf16 | |
2164 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
2165 { | |
2166 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2167 | |
2168 return (native_encoding == encoding) ? | |
2169 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
2170 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
2171 } | |
2172 | |
2173 // source encoding is utf32 | |
2174 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
2175 { | |
2176 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2177 | |
2178 return (native_encoding == encoding) ? | |
2179 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
2180 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
2181 } | |
2182 | |
2183 // source encoding is latin1 | |
2184 if (encoding == encoding_latin1) | |
2185 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); | |
2186 | |
2187 assert(false && "Invalid encoding"); // unreachable | |
2188 return false; | |
2189 } | |
2190 #else | |
2191 template <typename D> PUGI__FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
2192 { | |
2193 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
2194 size_t data_length = size / sizeof(typename D::type); | |
2195 | |
2196 // first pass: get length in utf8 units | |
2197 size_t length = D::process(data, data_length, 0, utf8_counter()); | |
2198 | |
2199 // allocate buffer of suitable length | |
2200 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2201 if (!buffer) return false; | |
2202 | |
2203 // second pass: convert utf16 input to utf8 | |
2204 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
2205 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); | |
2206 | |
2207 assert(oend == obegin + length); | |
2208 *oend = 0; | |
2209 | |
2210 out_buffer = buffer; | |
2211 out_length = length + 1; | |
2212 | |
2213 return true; | |
2214 } | |
2215 | |
2216 PUGI__FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) | |
2217 { | |
2218 for (size_t i = 0; i < size; ++i) | |
2219 if (data[i] > 127) | |
2220 return i; | |
2221 | |
2222 return size; | |
2223 } | |
2224 | |
2225 PUGI__FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2226 { | |
2227 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
2228 size_t data_length = size; | |
2229 | |
2230 // get size of prefix that does not need utf8 conversion | |
2231 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); | |
2232 assert(prefix_length <= data_length); | |
2233 | |
2234 const uint8_t* postfix = data + prefix_length; | |
2235 size_t postfix_length = data_length - prefix_length; | |
2236 | |
2237 // if no conversion is needed, just return the original buffer | |
2238 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2239 | |
2240 // first pass: get length in utf8 units | |
2241 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); | |
2242 | |
2243 // allocate buffer of suitable length | |
2244 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2245 if (!buffer) return false; | |
2246 | |
2247 // second pass: convert latin1 input to utf8 | |
2248 memcpy(buffer, data, prefix_length); | |
2249 | |
2250 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
2251 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); | |
2252 | |
2253 assert(oend == obegin + length); | |
2254 *oend = 0; | |
2255 | |
2256 out_buffer = buffer; | |
2257 out_length = length + 1; | |
2258 | |
2259 return true; | |
2260 } | |
2261 | |
2262 PUGI__FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
2263 { | |
2264 // fast path: no conversion required | |
2265 if (encoding == encoding_utf8) | |
2266 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2267 | |
2268 // source encoding is utf16 | |
2269 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
2270 { | |
2271 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2272 | |
2273 return (native_encoding == encoding) ? | |
2274 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
2275 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
2276 } | |
2277 | |
2278 // source encoding is utf32 | |
2279 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
2280 { | |
2281 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2282 | |
2283 return (native_encoding == encoding) ? | |
2284 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
2285 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
2286 } | |
2287 | |
2288 // source encoding is latin1 | |
2289 if (encoding == encoding_latin1) | |
2290 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); | |
2291 | |
2292 assert(false && "Invalid encoding"); // unreachable | |
2293 return false; | |
2294 } | |
2295 #endif | |
2296 | |
2297 PUGI__FN size_t as_utf8_begin(const wchar_t* str, size_t length) | |
2298 { | |
2299 // get length in utf8 characters | |
2300 return wchar_decoder::process(str, length, 0, utf8_counter()); | |
2301 } | |
2302 | |
2303 PUGI__FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) | |
2304 { | |
2305 // convert to utf8 | |
2306 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); | |
2307 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); | |
2308 | |
2309 assert(begin + size == end); | |
2310 (void)!end; | |
2311 (void)!size; | |
2312 } | |
2313 | |
2314 #ifndef PUGIXML_NO_STL | |
2315 PUGI__FN std::string as_utf8_impl(const wchar_t* str, size_t length) | |
2316 { | |
2317 // first pass: get length in utf8 characters | |
2318 size_t size = as_utf8_begin(str, length); | |
2319 | |
2320 // allocate resulting string | |
2321 std::string result; | |
2322 result.resize(size); | |
2323 | |
2324 // second pass: convert to utf8 | |
2325 if (size > 0) as_utf8_end(&result[0], size, str, length); | |
2326 | |
2327 return result; | |
2328 } | |
2329 | |
2330 PUGI__FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) | |
2331 { | |
2332 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); | |
2333 | |
2334 // first pass: get length in wchar_t units | |
2335 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); | |
2336 | |
2337 // allocate resulting string | |
2338 std::basic_string<wchar_t> result; | |
2339 result.resize(length); | |
2340 | |
2341 // second pass: convert to wchar_t | |
2342 if (length > 0) | |
2343 { | |
2344 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); | |
2345 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); | |
2346 | |
2347 assert(begin + length == end); | |
2348 (void)!end; | |
2349 } | |
2350 | |
2351 return result; | |
2352 } | |
2353 #endif | |
2354 | |
2355 template <typename Header> | |
2356 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) | |
2357 { | |
2358 // never reuse shared memory | |
2359 if (header & xml_memory_page_contents_shared_mask) return false; | |
2360 | |
2361 size_t target_length = strlength(target); | |
2362 | |
2363 // always reuse document buffer memory if possible | |
2364 if ((header & header_mask) == 0) return target_length >= length; | |
2365 | |
2366 // reuse heap memory if waste is not too great | |
2367 const size_t reuse_threshold = 32; | |
2368 | |
2369 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); | |
2370 } | |
2371 | |
2372 template <typename String, typename Header> | |
2373 PUGI__FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) | |
2374 { | |
2375 if (source_length == 0) | |
2376 { | |
2377 // empty string and null pointer are equivalent, so just deallocate old memory | |
2378 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; | |
2379 | |
2380 if (header & header_mask) alloc->deallocate_string(dest); | |
2381 | |
2382 // mark the string as not allocated | |
2383 dest = 0; | |
2384 header &= ~header_mask; | |
2385 | |
2386 return true; | |
2387 } | |
2388 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) | |
2389 { | |
2390 // we can reuse old buffer, so just copy the new data (including zero terminator) | |
2391 memcpy(dest, source, source_length * sizeof(char_t)); | |
2392 dest[source_length] = 0; | |
2393 | |
2394 return true; | |
2395 } | |
2396 else | |
2397 { | |
2398 xml_allocator* alloc = PUGI__GETPAGE_IMPL(header)->allocator; | |
2399 | |
2400 if (!alloc->reserve()) return false; | |
2401 | |
2402 // allocate new buffer | |
2403 char_t* buf = alloc->allocate_string(source_length + 1); | |
2404 if (!buf) return false; | |
2405 | |
2406 // copy the string (including zero terminator) | |
2407 memcpy(buf, source, source_length * sizeof(char_t)); | |
2408 buf[source_length] = 0; | |
2409 | |
2410 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) | |
2411 if (header & header_mask) alloc->deallocate_string(dest); | |
2412 | |
2413 // the string is now allocated, so set the flag | |
2414 dest = buf; | |
2415 header |= header_mask; | |
2416 | |
2417 return true; | |
2418 } | |
2419 } | |
2420 | |
2421 struct gap | |
2422 { | |
2423 char_t* end; | |
2424 size_t size; | |
2425 | |
2426 gap(): end(0), size(0) | |
2427 { | |
2428 } | |
2429 | |
2430 // Push new gap, move s count bytes further (skipping the gap). | |
2431 // Collapse previous gap. | |
2432 void push(char_t*& s, size_t count) | |
2433 { | |
2434 if (end) // there was a gap already; collapse it | |
2435 { | |
2436 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) | |
2437 assert(s >= end); | |
2438 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
2439 } | |
2440 | |
2441 s += count; // end of current gap | |
2442 | |
2443 // "merge" two gaps | |
2444 end = s; | |
2445 size += count; | |
2446 } | |
2447 | |
2448 // Collapse all gaps, return past-the-end pointer | |
2449 char_t* flush(char_t* s) | |
2450 { | |
2451 if (end) | |
2452 { | |
2453 // Move [old_gap_end, current_pos) to [old_gap_start, ...) | |
2454 assert(s >= end); | |
2455 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
2456 | |
2457 return s - size; | |
2458 } | |
2459 else return s; | |
2460 } | |
2461 }; | |
2462 | |
2463 PUGI__FN char_t* strconv_escape(char_t* s, gap& g) | |
2464 { | |
2465 char_t* stre = s + 1; | |
2466 | |
2467 switch (*stre) | |
2468 { | |
2469 case '#': // &#... | |
2470 { | |
2471 unsigned int ucsc = 0; | |
2472 | |
2473 if (stre[1] == 'x') // &#x... (hex code) | |
2474 { | |
2475 stre += 2; | |
2476 | |
2477 char_t ch = *stre; | |
2478 | |
2479 if (ch == ';') return stre; | |
2480 | |
2481 for (;;) | |
2482 { | |
2483 if (static_cast<unsigned int>(ch - '0') <= 9) | |
2484 ucsc = 16 * ucsc + (ch - '0'); | |
2485 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) | |
2486 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); | |
2487 else if (ch == ';') | |
2488 break; | |
2489 else // cancel | |
2490 return stre; | |
2491 | |
2492 ch = *++stre; | |
2493 } | |
2494 | |
2495 ++stre; | |
2496 } | |
2497 else // &#... (dec code) | |
2498 { | |
2499 char_t ch = *++stre; | |
2500 | |
2501 if (ch == ';') return stre; | |
2502 | |
2503 for (;;) | |
2504 { | |
2505 if (static_cast<unsigned int>(ch - '0') <= 9) | |
2506 ucsc = 10 * ucsc + (ch - '0'); | |
2507 else if (ch == ';') | |
2508 break; | |
2509 else // cancel | |
2510 return stre; | |
2511 | |
2512 ch = *++stre; | |
2513 } | |
2514 | |
2515 ++stre; | |
2516 } | |
2517 | |
2518 #ifdef PUGIXML_WCHAR_MODE | |
2519 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); | |
2520 #else | |
2521 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); | |
2522 #endif | |
2523 | |
2524 g.push(s, stre - s); | |
2525 return stre; | |
2526 } | |
2527 | |
2528 case 'a': // &a | |
2529 { | |
2530 ++stre; | |
2531 | |
2532 if (*stre == 'm') // &am | |
2533 { | |
2534 if (*++stre == 'p' && *++stre == ';') // & | |
2535 { | |
2536 *s++ = '&'; | |
2537 ++stre; | |
2538 | |
2539 g.push(s, stre - s); | |
2540 return stre; | |
2541 } | |
2542 } | |
2543 else if (*stre == 'p') // &ap | |
2544 { | |
2545 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' | |
2546 { | |
2547 *s++ = '\''; | |
2548 ++stre; | |
2549 | |
2550 g.push(s, stre - s); | |
2551 return stre; | |
2552 } | |
2553 } | |
2554 break; | |
2555 } | |
2556 | |
2557 case 'g': // &g | |
2558 { | |
2559 if (*++stre == 't' && *++stre == ';') // > | |
2560 { | |
2561 *s++ = '>'; | |
2562 ++stre; | |
2563 | |
2564 g.push(s, stre - s); | |
2565 return stre; | |
2566 } | |
2567 break; | |
2568 } | |
2569 | |
2570 case 'l': // &l | |
2571 { | |
2572 if (*++stre == 't' && *++stre == ';') // < | |
2573 { | |
2574 *s++ = '<'; | |
2575 ++stre; | |
2576 | |
2577 g.push(s, stre - s); | |
2578 return stre; | |
2579 } | |
2580 break; | |
2581 } | |
2582 | |
2583 case 'q': // &q | |
2584 { | |
2585 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " | |
2586 { | |
2587 *s++ = '"'; | |
2588 ++stre; | |
2589 | |
2590 g.push(s, stre - s); | |
2591 return stre; | |
2592 } | |
2593 break; | |
2594 } | |
2595 | |
2596 default: | |
2597 break; | |
2598 } | |
2599 | |
2600 return stre; | |
2601 } | |
2602 | |
2603 // Parser utilities | |
2604 #define PUGI__ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) | |
2605 #define PUGI__SKIPWS() { while (PUGI__IS_CHARTYPE(*s, ct_space)) ++s; } | |
2606 #define PUGI__OPTSET(OPT) ( optmsk & (OPT) ) | |
2607 #define PUGI__PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI__THROW_ERROR(status_out_of_memory, s); } | |
2608 #define PUGI__POPNODE() { cursor = cursor->parent; } | |
2609 #define PUGI__SCANFOR(X) { while (*s != 0 && !(X)) ++s; } | |
2610 #define PUGI__SCANWHILE(X) { while (X) ++s; } | |
2611 #define PUGI__SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI__UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI__UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI__UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI__UNLIKELY(!(X))) { s += 3; break; } s += 4; } } | |
2612 #define PUGI__ENDSEG() { ch = *s; *s = 0; ++s; } | |
2613 #define PUGI__THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) | |
2614 #define PUGI__CHECK_ERROR(err, m) { if (*s == 0) PUGI__THROW_ERROR(err, m); } | |
2615 | |
2616 PUGI__FN char_t* strconv_comment(char_t* s, char_t endch) | |
2617 { | |
2618 gap g; | |
2619 | |
2620 while (true) | |
2621 { | |
2622 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_comment)); | |
2623 | |
2624 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2625 { | |
2626 *s++ = '\n'; // replace first one with 0x0a | |
2627 | |
2628 if (*s == '\n') g.push(s, 1); | |
2629 } | |
2630 else if (s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')) // comment ends here | |
2631 { | |
2632 *g.flush(s) = 0; | |
2633 | |
2634 return s + (s[2] == '>' ? 3 : 2); | |
2635 } | |
2636 else if (*s == 0) | |
2637 { | |
2638 return 0; | |
2639 } | |
2640 else ++s; | |
2641 } | |
2642 } | |
2643 | |
2644 PUGI__FN char_t* strconv_cdata(char_t* s, char_t endch) | |
2645 { | |
2646 gap g; | |
2647 | |
2648 while (true) | |
2649 { | |
2650 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_cdata)); | |
2651 | |
2652 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2653 { | |
2654 *s++ = '\n'; // replace first one with 0x0a | |
2655 | |
2656 if (*s == '\n') g.push(s, 1); | |
2657 } | |
2658 else if (s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')) // CDATA ends here | |
2659 { | |
2660 *g.flush(s) = 0; | |
2661 | |
2662 return s + 1; | |
2663 } | |
2664 else if (*s == 0) | |
2665 { | |
2666 return 0; | |
2667 } | |
2668 else ++s; | |
2669 } | |
2670 } | |
2671 | |
2672 typedef char_t* (*strconv_pcdata_t)(char_t*); | |
2673 | |
2674 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl | |
2675 { | |
2676 static char_t* parse(char_t* s) | |
2677 { | |
2678 gap g; | |
2679 | |
2680 char_t* begin = s; | |
2681 | |
2682 while (true) | |
2683 { | |
2684 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_pcdata)); | |
2685 | |
2686 if (*s == '<') // PCDATA ends here | |
2687 { | |
2688 char_t* end = g.flush(s); | |
2689 | |
2690 if (opt_trim::value) | |
2691 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) | |
2692 --end; | |
2693 | |
2694 *end = 0; | |
2695 | |
2696 return s + 1; | |
2697 } | |
2698 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2699 { | |
2700 *s++ = '\n'; // replace first one with 0x0a | |
2701 | |
2702 if (*s == '\n') g.push(s, 1); | |
2703 } | |
2704 else if (opt_escape::value && *s == '&') | |
2705 { | |
2706 s = strconv_escape(s, g); | |
2707 } | |
2708 else if (*s == 0) | |
2709 { | |
2710 char_t* end = g.flush(s); | |
2711 | |
2712 if (opt_trim::value) | |
2713 while (end > begin && PUGI__IS_CHARTYPE(end[-1], ct_space)) | |
2714 --end; | |
2715 | |
2716 *end = 0; | |
2717 | |
2718 return s; | |
2719 } | |
2720 else ++s; | |
2721 } | |
2722 } | |
2723 }; | |
2724 | |
2725 PUGI__FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) | |
2726 { | |
2727 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); | |
2728 | |
2729 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above | |
2730 { | |
2731 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; | |
2732 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; | |
2733 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; | |
2734 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; | |
2735 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; | |
2736 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; | |
2737 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; | |
2738 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; | |
2739 default: assert(false); return 0; // unreachable | |
2740 } | |
2741 } | |
2742 | |
2743 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); | |
2744 | |
2745 template <typename opt_escape> struct strconv_attribute_impl | |
2746 { | |
2747 static char_t* parse_wnorm(char_t* s, char_t end_quote) | |
2748 { | |
2749 gap g; | |
2750 | |
2751 // trim leading whitespaces | |
2752 if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
2753 { | |
2754 char_t* str = s; | |
2755 | |
2756 do ++str; | |
2757 while (PUGI__IS_CHARTYPE(*str, ct_space)); | |
2758 | |
2759 g.push(s, str - s); | |
2760 } | |
2761 | |
2762 while (true) | |
2763 { | |
2764 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); | |
2765 | |
2766 if (*s == end_quote) | |
2767 { | |
2768 char_t* str = g.flush(s); | |
2769 | |
2770 do *str-- = 0; | |
2771 while (PUGI__IS_CHARTYPE(*str, ct_space)); | |
2772 | |
2773 return s + 1; | |
2774 } | |
2775 else if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
2776 { | |
2777 *s++ = ' '; | |
2778 | |
2779 if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
2780 { | |
2781 char_t* str = s + 1; | |
2782 while (PUGI__IS_CHARTYPE(*str, ct_space)) ++str; | |
2783 | |
2784 g.push(s, str - s); | |
2785 } | |
2786 } | |
2787 else if (opt_escape::value && *s == '&') | |
2788 { | |
2789 s = strconv_escape(s, g); | |
2790 } | |
2791 else if (!*s) | |
2792 { | |
2793 return 0; | |
2794 } | |
2795 else ++s; | |
2796 } | |
2797 } | |
2798 | |
2799 static char_t* parse_wconv(char_t* s, char_t end_quote) | |
2800 { | |
2801 gap g; | |
2802 | |
2803 while (true) | |
2804 { | |
2805 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr_ws)); | |
2806 | |
2807 if (*s == end_quote) | |
2808 { | |
2809 *g.flush(s) = 0; | |
2810 | |
2811 return s + 1; | |
2812 } | |
2813 else if (PUGI__IS_CHARTYPE(*s, ct_space)) | |
2814 { | |
2815 if (*s == '\r') | |
2816 { | |
2817 *s++ = ' '; | |
2818 | |
2819 if (*s == '\n') g.push(s, 1); | |
2820 } | |
2821 else *s++ = ' '; | |
2822 } | |
2823 else if (opt_escape::value && *s == '&') | |
2824 { | |
2825 s = strconv_escape(s, g); | |
2826 } | |
2827 else if (!*s) | |
2828 { | |
2829 return 0; | |
2830 } | |
2831 else ++s; | |
2832 } | |
2833 } | |
2834 | |
2835 static char_t* parse_eol(char_t* s, char_t end_quote) | |
2836 { | |
2837 gap g; | |
2838 | |
2839 while (true) | |
2840 { | |
2841 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); | |
2842 | |
2843 if (*s == end_quote) | |
2844 { | |
2845 *g.flush(s) = 0; | |
2846 | |
2847 return s + 1; | |
2848 } | |
2849 else if (*s == '\r') | |
2850 { | |
2851 *s++ = '\n'; | |
2852 | |
2853 if (*s == '\n') g.push(s, 1); | |
2854 } | |
2855 else if (opt_escape::value && *s == '&') | |
2856 { | |
2857 s = strconv_escape(s, g); | |
2858 } | |
2859 else if (!*s) | |
2860 { | |
2861 return 0; | |
2862 } | |
2863 else ++s; | |
2864 } | |
2865 } | |
2866 | |
2867 static char_t* parse_simple(char_t* s, char_t end_quote) | |
2868 { | |
2869 gap g; | |
2870 | |
2871 while (true) | |
2872 { | |
2873 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPE(ss, ct_parse_attr)); | |
2874 | |
2875 if (*s == end_quote) | |
2876 { | |
2877 *g.flush(s) = 0; | |
2878 | |
2879 return s + 1; | |
2880 } | |
2881 else if (opt_escape::value && *s == '&') | |
2882 { | |
2883 s = strconv_escape(s, g); | |
2884 } | |
2885 else if (!*s) | |
2886 { | |
2887 return 0; | |
2888 } | |
2889 else ++s; | |
2890 } | |
2891 } | |
2892 }; | |
2893 | |
2894 PUGI__FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) | |
2895 { | |
2896 PUGI__STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); | |
2897 | |
2898 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above | |
2899 { | |
2900 case 0: return strconv_attribute_impl<opt_false>::parse_simple; | |
2901 case 1: return strconv_attribute_impl<opt_true>::parse_simple; | |
2902 case 2: return strconv_attribute_impl<opt_false>::parse_eol; | |
2903 case 3: return strconv_attribute_impl<opt_true>::parse_eol; | |
2904 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; | |
2905 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; | |
2906 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; | |
2907 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; | |
2908 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2909 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2910 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2911 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2912 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2913 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2914 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2915 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2916 default: assert(false); return 0; // unreachable | |
2917 } | |
2918 } | |
2919 | |
2920 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) | |
2921 { | |
2922 xml_parse_result result; | |
2923 result.status = status; | |
2924 result.offset = offset; | |
2925 | |
2926 return result; | |
2927 } | |
2928 | |
2929 struct xml_parser | |
2930 { | |
2931 xml_allocator* alloc; | |
2932 char_t* error_offset; | |
2933 xml_parse_status error_status; | |
2934 | |
2935 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) | |
2936 { | |
2937 } | |
2938 | |
2939 // DOCTYPE consists of nested sections of the following possible types: | |
2940 // <!-- ... -->, <? ... ?>, "...", '...' | |
2941 // <![...]]> | |
2942 // <!...> | |
2943 // First group can not contain nested groups | |
2944 // Second group can contain nested groups of the same type | |
2945 // Third group can contain all other groups | |
2946 char_t* parse_doctype_primitive(char_t* s) | |
2947 { | |
2948 if (*s == '"' || *s == '\'') | |
2949 { | |
2950 // quoted string | |
2951 char_t ch = *s++; | |
2952 PUGI__SCANFOR(*s == ch); | |
2953 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
2954 | |
2955 s++; | |
2956 } | |
2957 else if (s[0] == '<' && s[1] == '?') | |
2958 { | |
2959 // <? ... ?> | |
2960 s += 2; | |
2961 PUGI__SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype | |
2962 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
2963 | |
2964 s += 2; | |
2965 } | |
2966 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') | |
2967 { | |
2968 s += 4; | |
2969 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype | |
2970 if (!*s) PUGI__THROW_ERROR(status_bad_doctype, s); | |
2971 | |
2972 s += 3; | |
2973 } | |
2974 else PUGI__THROW_ERROR(status_bad_doctype, s); | |
2975 | |
2976 return s; | |
2977 } | |
2978 | |
2979 char_t* parse_doctype_ignore(char_t* s) | |
2980 { | |
2981 size_t depth = 0; | |
2982 | |
2983 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); | |
2984 s += 3; | |
2985 | |
2986 while (*s) | |
2987 { | |
2988 if (s[0] == '<' && s[1] == '!' && s[2] == '[') | |
2989 { | |
2990 // nested ignore section | |
2991 s += 3; | |
2992 depth++; | |
2993 } | |
2994 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') | |
2995 { | |
2996 // ignore section end | |
2997 s += 3; | |
2998 | |
2999 if (depth == 0) | |
3000 return s; | |
3001 | |
3002 depth--; | |
3003 } | |
3004 else s++; | |
3005 } | |
3006 | |
3007 PUGI__THROW_ERROR(status_bad_doctype, s); | |
3008 } | |
3009 | |
3010 char_t* parse_doctype_group(char_t* s, char_t endch) | |
3011 { | |
3012 size_t depth = 0; | |
3013 | |
3014 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); | |
3015 s += 2; | |
3016 | |
3017 while (*s) | |
3018 { | |
3019 if (s[0] == '<' && s[1] == '!' && s[2] != '-') | |
3020 { | |
3021 if (s[2] == '[') | |
3022 { | |
3023 // ignore | |
3024 s = parse_doctype_ignore(s); | |
3025 if (!s) return s; | |
3026 } | |
3027 else | |
3028 { | |
3029 // some control group | |
3030 s += 2; | |
3031 depth++; | |
3032 } | |
3033 } | |
3034 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') | |
3035 { | |
3036 // unknown tag (forbidden), or some primitive group | |
3037 s = parse_doctype_primitive(s); | |
3038 if (!s) return s; | |
3039 } | |
3040 else if (*s == '>') | |
3041 { | |
3042 if (depth == 0) | |
3043 return s; | |
3044 | |
3045 depth--; | |
3046 s++; | |
3047 } | |
3048 else s++; | |
3049 } | |
3050 | |
3051 if (depth != 0 || endch != '>') PUGI__THROW_ERROR(status_bad_doctype, s); | |
3052 | |
3053 return s; | |
3054 } | |
3055 | |
3056 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) | |
3057 { | |
3058 // parse node contents, starting with exclamation mark | |
3059 ++s; | |
3060 | |
3061 if (*s == '-') // '<!-...' | |
3062 { | |
3063 ++s; | |
3064 | |
3065 if (*s == '-') // '<!--...' | |
3066 { | |
3067 ++s; | |
3068 | |
3069 if (PUGI__OPTSET(parse_comments)) | |
3070 { | |
3071 PUGI__PUSHNODE(node_comment); // Append a new node on the tree. | |
3072 cursor->value = s; // Save the offset. | |
3073 } | |
3074 | |
3075 if (PUGI__OPTSET(parse_eol) && PUGI__OPTSET(parse_comments)) | |
3076 { | |
3077 s = strconv_comment(s, endch); | |
3078 | |
3079 if (!s) PUGI__THROW_ERROR(status_bad_comment, cursor->value); | |
3080 } | |
3081 else | |
3082 { | |
3083 // Scan for terminating '-->'. | |
3084 PUGI__SCANFOR(s[0] == '-' && s[1] == '-' && PUGI__ENDSWITH(s[2], '>')); | |
3085 PUGI__CHECK_ERROR(status_bad_comment, s); | |
3086 | |
3087 if (PUGI__OPTSET(parse_comments)) | |
3088 *s = 0; // Zero-terminate this segment at the first terminating '-'. | |
3089 | |
3090 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. | |
3091 } | |
3092 } | |
3093 else PUGI__THROW_ERROR(status_bad_comment, s); | |
3094 } | |
3095 else if (*s == '[') | |
3096 { | |
3097 // '<![CDATA[...' | |
3098 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') | |
3099 { | |
3100 ++s; | |
3101 | |
3102 if (PUGI__OPTSET(parse_cdata)) | |
3103 { | |
3104 PUGI__PUSHNODE(node_cdata); // Append a new node on the tree. | |
3105 cursor->value = s; // Save the offset. | |
3106 | |
3107 if (PUGI__OPTSET(parse_eol)) | |
3108 { | |
3109 s = strconv_cdata(s, endch); | |
3110 | |
3111 if (!s) PUGI__THROW_ERROR(status_bad_cdata, cursor->value); | |
3112 } | |
3113 else | |
3114 { | |
3115 // Scan for terminating ']]>'. | |
3116 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); | |
3117 PUGI__CHECK_ERROR(status_bad_cdata, s); | |
3118 | |
3119 *s++ = 0; // Zero-terminate this segment. | |
3120 } | |
3121 } | |
3122 else // Flagged for discard, but we still have to scan for the terminator. | |
3123 { | |
3124 // Scan for terminating ']]>'. | |
3125 PUGI__SCANFOR(s[0] == ']' && s[1] == ']' && PUGI__ENDSWITH(s[2], '>')); | |
3126 PUGI__CHECK_ERROR(status_bad_cdata, s); | |
3127 | |
3128 ++s; | |
3129 } | |
3130 | |
3131 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. | |
3132 } | |
3133 else PUGI__THROW_ERROR(status_bad_cdata, s); | |
3134 } | |
3135 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI__ENDSWITH(s[6], 'E')) | |
3136 { | |
3137 s -= 2; | |
3138 | |
3139 if (cursor->parent) PUGI__THROW_ERROR(status_bad_doctype, s); | |
3140 | |
3141 char_t* mark = s + 9; | |
3142 | |
3143 s = parse_doctype_group(s, endch); | |
3144 if (!s) return s; | |
3145 | |
3146 assert((*s == 0 && endch == '>') || *s == '>'); | |
3147 if (*s) *s++ = 0; | |
3148 | |
3149 if (PUGI__OPTSET(parse_doctype)) | |
3150 { | |
3151 while (PUGI__IS_CHARTYPE(*mark, ct_space)) ++mark; | |
3152 | |
3153 PUGI__PUSHNODE(node_doctype); | |
3154 | |
3155 cursor->value = mark; | |
3156 } | |
3157 } | |
3158 else if (*s == 0 && endch == '-') PUGI__THROW_ERROR(status_bad_comment, s); | |
3159 else if (*s == 0 && endch == '[') PUGI__THROW_ERROR(status_bad_cdata, s); | |
3160 else PUGI__THROW_ERROR(status_unrecognized_tag, s); | |
3161 | |
3162 return s; | |
3163 } | |
3164 | |
3165 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) | |
3166 { | |
3167 // load into registers | |
3168 xml_node_struct* cursor = ref_cursor; | |
3169 char_t ch = 0; | |
3170 | |
3171 // parse node contents, starting with question mark | |
3172 ++s; | |
3173 | |
3174 // read PI target | |
3175 char_t* target = s; | |
3176 | |
3177 if (!PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_pi, s); | |
3178 | |
3179 PUGI__SCANWHILE(PUGI__IS_CHARTYPE(*s, ct_symbol)); | |
3180 PUGI__CHECK_ERROR(status_bad_pi, s); | |
3181 | |
3182 // determine node type; stricmp / strcasecmp is not portable | |
3183 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; | |
3184 | |
3185 if (declaration ? PUGI__OPTSET(parse_declaration) : PUGI__OPTSET(parse_pi)) | |
3186 { | |
3187 if (declaration) | |
3188 { | |
3189 // disallow non top-level declarations | |
3190 if (cursor->parent) PUGI__THROW_ERROR(status_bad_pi, s); | |
3191 | |
3192 PUGI__PUSHNODE(node_declaration); | |
3193 } | |
3194 else | |
3195 { | |
3196 PUGI__PUSHNODE(node_pi); | |
3197 } | |
3198 | |
3199 cursor->name = target; | |
3200 | |
3201 PUGI__ENDSEG(); | |
3202 | |
3203 // parse value/attributes | |
3204 if (ch == '?') | |
3205 { | |
3206 // empty node | |
3207 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_pi, s); | |
3208 s += (*s == '>'); | |
3209 | |
3210 PUGI__POPNODE(); | |
3211 } | |
3212 else if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
3213 { | |
3214 PUGI__SKIPWS(); | |
3215 | |
3216 // scan for tag end | |
3217 char_t* value = s; | |
3218 | |
3219 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); | |
3220 PUGI__CHECK_ERROR(status_bad_pi, s); | |
3221 | |
3222 if (declaration) | |
3223 { | |
3224 // replace ending ? with / so that 'element' terminates properly | |
3225 *s = '/'; | |
3226 | |
3227 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES | |
3228 s = value; | |
3229 } | |
3230 else | |
3231 { | |
3232 // store value and step over > | |
3233 cursor->value = value; | |
3234 | |
3235 PUGI__POPNODE(); | |
3236 | |
3237 PUGI__ENDSEG(); | |
3238 | |
3239 s += (*s == '>'); | |
3240 } | |
3241 } | |
3242 else PUGI__THROW_ERROR(status_bad_pi, s); | |
3243 } | |
3244 else | |
3245 { | |
3246 // scan for tag end | |
3247 PUGI__SCANFOR(s[0] == '?' && PUGI__ENDSWITH(s[1], '>')); | |
3248 PUGI__CHECK_ERROR(status_bad_pi, s); | |
3249 | |
3250 s += (s[1] == '>' ? 2 : 1); | |
3251 } | |
3252 | |
3253 // store from registers | |
3254 ref_cursor = cursor; | |
3255 | |
3256 return s; | |
3257 } | |
3258 | |
3259 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) | |
3260 { | |
3261 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); | |
3262 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); | |
3263 | |
3264 char_t ch = 0; | |
3265 xml_node_struct* cursor = root; | |
3266 char_t* mark = s; | |
3267 | |
3268 while (*s != 0) | |
3269 { | |
3270 if (*s == '<') | |
3271 { | |
3272 ++s; | |
3273 | |
3274 LOC_TAG: | |
3275 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' | |
3276 { | |
3277 PUGI__PUSHNODE(node_element); // Append a new node to the tree. | |
3278 | |
3279 cursor->name = s; | |
3280 | |
3281 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
3282 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. | |
3283 | |
3284 if (ch == '>') | |
3285 { | |
3286 // end of tag | |
3287 } | |
3288 else if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
3289 { | |
3290 LOC_ATTRIBUTES: | |
3291 while (true) | |
3292 { | |
3293 PUGI__SKIPWS(); // Eat any whitespace. | |
3294 | |
3295 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) // <... #... | |
3296 { | |
3297 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. | |
3298 if (!a) PUGI__THROW_ERROR(status_out_of_memory, s); | |
3299 | |
3300 a->name = s; // Save the offset. | |
3301 | |
3302 PUGI__SCANWHILE_UNROLL(PUGI__IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
3303 PUGI__ENDSEG(); // Save char in 'ch', terminate & step over. | |
3304 | |
3305 if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
3306 { | |
3307 PUGI__SKIPWS(); // Eat any whitespace. | |
3308 | |
3309 ch = *s; | |
3310 ++s; | |
3311 } | |
3312 | |
3313 if (ch == '=') // '<... #=...' | |
3314 { | |
3315 PUGI__SKIPWS(); // Eat any whitespace. | |
3316 | |
3317 if (*s == '"' || *s == '\'') // '<... #="...' | |
3318 { | |
3319 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. | |
3320 ++s; // Step over the quote. | |
3321 a->value = s; // Save the offset. | |
3322 | |
3323 s = strconv_attribute(s, ch); | |
3324 | |
3325 if (!s) PUGI__THROW_ERROR(status_bad_attribute, a->value); | |
3326 | |
3327 // After this line the loop continues from the start; | |
3328 // Whitespaces, / and > are ok, symbols and EOF are wrong, | |
3329 // everything else will be detected | |
3330 if (PUGI__IS_CHARTYPE(*s, ct_start_symbol)) PUGI__THROW_ERROR(status_bad_attribute, s); | |
3331 } | |
3332 else PUGI__THROW_ERROR(status_bad_attribute, s); | |
3333 } | |
3334 else PUGI__THROW_ERROR(status_bad_attribute, s); | |
3335 } | |
3336 else if (*s == '/') | |
3337 { | |
3338 ++s; | |
3339 | |
3340 if (*s == '>') | |
3341 { | |
3342 PUGI__POPNODE(); | |
3343 s++; | |
3344 break; | |
3345 } | |
3346 else if (*s == 0 && endch == '>') | |
3347 { | |
3348 PUGI__POPNODE(); | |
3349 break; | |
3350 } | |
3351 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
3352 } | |
3353 else if (*s == '>') | |
3354 { | |
3355 ++s; | |
3356 | |
3357 break; | |
3358 } | |
3359 else if (*s == 0 && endch == '>') | |
3360 { | |
3361 break; | |
3362 } | |
3363 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
3364 } | |
3365 | |
3366 // !!! | |
3367 } | |
3368 else if (ch == '/') // '<#.../' | |
3369 { | |
3370 if (!PUGI__ENDSWITH(*s, '>')) PUGI__THROW_ERROR(status_bad_start_element, s); | |
3371 | |
3372 PUGI__POPNODE(); // Pop. | |
3373 | |
3374 s += (*s == '>'); | |
3375 } | |
3376 else if (ch == 0) | |
3377 { | |
3378 // we stepped over null terminator, backtrack & handle closing tag | |
3379 --s; | |
3380 | |
3381 if (endch != '>') PUGI__THROW_ERROR(status_bad_start_element, s); | |
3382 } | |
3383 else PUGI__THROW_ERROR(status_bad_start_element, s); | |
3384 } | |
3385 else if (*s == '/') | |
3386 { | |
3387 ++s; | |
3388 | |
3389 mark = s; | |
3390 | |
3391 char_t* name = cursor->name; | |
3392 if (!name) PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
3393 | |
3394 while (PUGI__IS_CHARTYPE(*s, ct_symbol)) | |
3395 { | |
3396 if (*s++ != *name++) PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
3397 } | |
3398 | |
3399 if (*name) | |
3400 { | |
3401 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI__THROW_ERROR(status_bad_end_element, s); | |
3402 else PUGI__THROW_ERROR(status_end_element_mismatch, mark); | |
3403 } | |
3404 | |
3405 PUGI__POPNODE(); // Pop. | |
3406 | |
3407 PUGI__SKIPWS(); | |
3408 | |
3409 if (*s == 0) | |
3410 { | |
3411 if (endch != '>') PUGI__THROW_ERROR(status_bad_end_element, s); | |
3412 } | |
3413 else | |
3414 { | |
3415 if (*s != '>') PUGI__THROW_ERROR(status_bad_end_element, s); | |
3416 ++s; | |
3417 } | |
3418 } | |
3419 else if (*s == '?') // '<?...' | |
3420 { | |
3421 s = parse_question(s, cursor, optmsk, endch); | |
3422 if (!s) return s; | |
3423 | |
3424 assert(cursor); | |
3425 if (PUGI__NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; | |
3426 } | |
3427 else if (*s == '!') // '<!...' | |
3428 { | |
3429 s = parse_exclamation(s, cursor, optmsk, endch); | |
3430 if (!s) return s; | |
3431 } | |
3432 else if (*s == 0 && endch == '?') PUGI__THROW_ERROR(status_bad_pi, s); | |
3433 else PUGI__THROW_ERROR(status_unrecognized_tag, s); | |
3434 } | |
3435 else | |
3436 { | |
3437 mark = s; // Save this offset while searching for a terminator. | |
3438 | |
3439 PUGI__SKIPWS(); // Eat whitespace if no genuine PCDATA here. | |
3440 | |
3441 if (*s == '<' || !*s) | |
3442 { | |
3443 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one | |
3444 assert(mark != s); | |
3445 | |
3446 if (!PUGI__OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI__OPTSET(parse_trim_pcdata)) | |
3447 { | |
3448 continue; | |
3449 } | |
3450 else if (PUGI__OPTSET(parse_ws_pcdata_single)) | |
3451 { | |
3452 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; | |
3453 } | |
3454 } | |
3455 | |
3456 if (!PUGI__OPTSET(parse_trim_pcdata)) | |
3457 s = mark; | |
3458 | |
3459 if (cursor->parent || PUGI__OPTSET(parse_fragment)) | |
3460 { | |
3461 if (PUGI__OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) | |
3462 { | |
3463 cursor->value = s; // Save the offset. | |
3464 } | |
3465 else | |
3466 { | |
3467 PUGI__PUSHNODE(node_pcdata); // Append a new node on the tree. | |
3468 | |
3469 cursor->value = s; // Save the offset. | |
3470 | |
3471 PUGI__POPNODE(); // Pop since this is a standalone. | |
3472 } | |
3473 | |
3474 s = strconv_pcdata(s); | |
3475 | |
3476 if (!*s) break; | |
3477 } | |
3478 else | |
3479 { | |
3480 PUGI__SCANFOR(*s == '<'); // '...<' | |
3481 if (!*s) break; | |
3482 | |
3483 ++s; | |
3484 } | |
3485 | |
3486 // We're after '<' | |
3487 goto LOC_TAG; | |
3488 } | |
3489 } | |
3490 | |
3491 // check that last tag is closed | |
3492 if (cursor != root) PUGI__THROW_ERROR(status_end_element_mismatch, s); | |
3493 | |
3494 return s; | |
3495 } | |
3496 | |
3497 #ifdef PUGIXML_WCHAR_MODE | |
3498 static char_t* parse_skip_bom(char_t* s) | |
3499 { | |
3500 unsigned int bom = 0xfeff; | |
3501 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; | |
3502 } | |
3503 #else | |
3504 static char_t* parse_skip_bom(char_t* s) | |
3505 { | |
3506 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; | |
3507 } | |
3508 #endif | |
3509 | |
3510 static bool has_element_node_siblings(xml_node_struct* node) | |
3511 { | |
3512 while (node) | |
3513 { | |
3514 if (PUGI__NODETYPE(node) == node_element) return true; | |
3515 | |
3516 node = node->next_sibling; | |
3517 } | |
3518 | |
3519 return false; | |
3520 } | |
3521 | |
3522 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) | |
3523 { | |
3524 // early-out for empty documents | |
3525 if (length == 0) | |
3526 return make_parse_result(PUGI__OPTSET(parse_fragment) ? status_ok : status_no_document_element); | |
3527 | |
3528 // get last child of the root before parsing | |
3529 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; | |
3530 | |
3531 // create parser on stack | |
3532 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); | |
3533 | |
3534 // save last character and make buffer zero-terminated (speeds up parsing) | |
3535 char_t endch = buffer[length - 1]; | |
3536 buffer[length - 1] = 0; | |
3537 | |
3538 // skip BOM to make sure it does not end up as part of parse output | |
3539 char_t* buffer_data = parse_skip_bom(buffer); | |
3540 | |
3541 // perform actual parsing | |
3542 parser.parse_tree(buffer_data, root, optmsk, endch); | |
3543 | |
3544 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); | |
3545 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); | |
3546 | |
3547 if (result) | |
3548 { | |
3549 // since we removed last character, we have to handle the only possible false positive (stray <) | |
3550 if (endch == '<') | |
3551 return make_parse_result(status_unrecognized_tag, length - 1); | |
3552 | |
3553 // check if there are any element nodes parsed | |
3554 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child+ 0; | |
3555 | |
3556 if (!PUGI__OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) | |
3557 return make_parse_result(status_no_document_element, length - 1); | |
3558 } | |
3559 else | |
3560 { | |
3561 // roll back offset if it occurs on a null terminator in the source buffer | |
3562 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) | |
3563 result.offset--; | |
3564 } | |
3565 | |
3566 return result; | |
3567 } | |
3568 }; | |
3569 | |
3570 // Output facilities | |
3571 PUGI__FN xml_encoding get_write_native_encoding() | |
3572 { | |
3573 #ifdef PUGIXML_WCHAR_MODE | |
3574 return get_wchar_encoding(); | |
3575 #else | |
3576 return encoding_utf8; | |
3577 #endif | |
3578 } | |
3579 | |
3580 PUGI__FN xml_encoding get_write_encoding(xml_encoding encoding) | |
3581 { | |
3582 // replace wchar encoding with utf implementation | |
3583 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
3584 | |
3585 // replace utf16 encoding with utf16 with specific endianness | |
3586 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3587 | |
3588 // replace utf32 encoding with utf32 with specific endianness | |
3589 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3590 | |
3591 // only do autodetection if no explicit encoding is requested | |
3592 if (encoding != encoding_auto) return encoding; | |
3593 | |
3594 // assume utf8 encoding | |
3595 return encoding_utf8; | |
3596 } | |
3597 | |
3598 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) | |
3599 { | |
3600 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
3601 | |
3602 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
3603 | |
3604 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
3605 } | |
3606 | |
3607 template <typename D, typename T> PUGI__FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) | |
3608 { | |
3609 PUGI__STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
3610 | |
3611 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
3612 | |
3613 if (opt_swap) | |
3614 { | |
3615 for (typename T::value_type i = dest; i != end; ++i) | |
3616 *i = endian_swap(*i); | |
3617 } | |
3618 | |
3619 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
3620 } | |
3621 | |
3622 #ifdef PUGIXML_WCHAR_MODE | |
3623 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) | |
3624 { | |
3625 if (length < 1) return 0; | |
3626 | |
3627 // discard last character if it's the lead of a surrogate pair | |
3628 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; | |
3629 } | |
3630 | |
3631 PUGI__FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
3632 { | |
3633 // only endian-swapping is required | |
3634 if (need_endian_swap_utf(encoding, get_wchar_encoding())) | |
3635 { | |
3636 convert_wchar_endian_swap(r_char, data, length); | |
3637 | |
3638 return length * sizeof(char_t); | |
3639 } | |
3640 | |
3641 // convert to utf8 | |
3642 if (encoding == encoding_utf8) | |
3643 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); | |
3644 | |
3645 // convert to utf16 | |
3646 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
3647 { | |
3648 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3649 | |
3650 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); | |
3651 } | |
3652 | |
3653 // convert to utf32 | |
3654 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
3655 { | |
3656 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3657 | |
3658 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); | |
3659 } | |
3660 | |
3661 // convert to latin1 | |
3662 if (encoding == encoding_latin1) | |
3663 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); | |
3664 | |
3665 assert(false && "Invalid encoding"); // unreachable | |
3666 return 0; | |
3667 } | |
3668 #else | |
3669 PUGI__FN size_t get_valid_length(const char_t* data, size_t length) | |
3670 { | |
3671 if (length < 5) return 0; | |
3672 | |
3673 for (size_t i = 1; i <= 4; ++i) | |
3674 { | |
3675 uint8_t ch = static_cast<uint8_t>(data[length - i]); | |
3676 | |
3677 // either a standalone character or a leading one | |
3678 if ((ch & 0xc0) != 0x80) return length - i; | |
3679 } | |
3680 | |
3681 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk | |
3682 return length; | |
3683 } | |
3684 | |
3685 PUGI__FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
3686 { | |
3687 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
3688 { | |
3689 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3690 | |
3691 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); | |
3692 } | |
3693 | |
3694 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
3695 { | |
3696 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3697 | |
3698 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); | |
3699 } | |
3700 | |
3701 if (encoding == encoding_latin1) | |
3702 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); | |
3703 | |
3704 assert(false && "Invalid encoding"); // unreachable | |
3705 return 0; | |
3706 } | |
3707 #endif | |
3708 | |
3709 class xml_buffered_writer | |
3710 { | |
3711 xml_buffered_writer(const xml_buffered_writer&); | |
3712 xml_buffered_writer& operator=(const xml_buffered_writer&); | |
3713 | |
3714 public: | |
3715 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) | |
3716 { | |
3717 PUGI__STATIC_ASSERT(bufcapacity >= 8); | |
3718 } | |
3719 | |
3720 size_t flush() | |
3721 { | |
3722 flush(buffer, bufsize); | |
3723 bufsize = 0; | |
3724 return 0; | |
3725 } | |
3726 | |
3727 void flush(const char_t* data, size_t size) | |
3728 { | |
3729 if (size == 0) return; | |
3730 | |
3731 // fast path, just write data | |
3732 if (encoding == get_write_native_encoding()) | |
3733 writer.write(data, size * sizeof(char_t)); | |
3734 else | |
3735 { | |
3736 // convert chunk | |
3737 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); | |
3738 assert(result <= sizeof(scratch)); | |
3739 | |
3740 // write data | |
3741 writer.write(scratch.data_u8, result); | |
3742 } | |
3743 } | |
3744 | |
3745 void write_direct(const char_t* data, size_t length) | |
3746 { | |
3747 // flush the remaining buffer contents | |
3748 flush(); | |
3749 | |
3750 // handle large chunks | |
3751 if (length > bufcapacity) | |
3752 { | |
3753 if (encoding == get_write_native_encoding()) | |
3754 { | |
3755 // fast path, can just write data chunk | |
3756 writer.write(data, length * sizeof(char_t)); | |
3757 return; | |
3758 } | |
3759 | |
3760 // need to convert in suitable chunks | |
3761 while (length > bufcapacity) | |
3762 { | |
3763 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer | |
3764 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) | |
3765 size_t chunk_size = get_valid_length(data, bufcapacity); | |
3766 assert(chunk_size); | |
3767 | |
3768 // convert chunk and write | |
3769 flush(data, chunk_size); | |
3770 | |
3771 // iterate | |
3772 data += chunk_size; | |
3773 length -= chunk_size; | |
3774 } | |
3775 | |
3776 // small tail is copied below | |
3777 bufsize = 0; | |
3778 } | |
3779 | |
3780 memcpy(buffer + bufsize, data, length * sizeof(char_t)); | |
3781 bufsize += length; | |
3782 } | |
3783 | |
3784 void write_buffer(const char_t* data, size_t length) | |
3785 { | |
3786 size_t offset = bufsize; | |
3787 | |
3788 if (offset + length <= bufcapacity) | |
3789 { | |
3790 memcpy(buffer + offset, data, length * sizeof(char_t)); | |
3791 bufsize = offset + length; | |
3792 } | |
3793 else | |
3794 { | |
3795 write_direct(data, length); | |
3796 } | |
3797 } | |
3798 | |
3799 void write_string(const char_t* data) | |
3800 { | |
3801 // write the part of the string that fits in the buffer | |
3802 size_t offset = bufsize; | |
3803 | |
3804 while (*data && offset < bufcapacity) | |
3805 buffer[offset++] = *data++; | |
3806 | |
3807 // write the rest | |
3808 if (offset < bufcapacity) | |
3809 { | |
3810 bufsize = offset; | |
3811 } | |
3812 else | |
3813 { | |
3814 // backtrack a bit if we have split the codepoint | |
3815 size_t length = offset - bufsize; | |
3816 size_t extra = length - get_valid_length(data - length, length); | |
3817 | |
3818 bufsize = offset - extra; | |
3819 | |
3820 write_direct(data - extra, strlength(data) + extra); | |
3821 } | |
3822 } | |
3823 | |
3824 void write(char_t d0) | |
3825 { | |
3826 size_t offset = bufsize; | |
3827 if (offset > bufcapacity - 1) offset = flush(); | |
3828 | |
3829 buffer[offset + 0] = d0; | |
3830 bufsize = offset + 1; | |
3831 } | |
3832 | |
3833 void write(char_t d0, char_t d1) | |
3834 { | |
3835 size_t offset = bufsize; | |
3836 if (offset > bufcapacity - 2) offset = flush(); | |
3837 | |
3838 buffer[offset + 0] = d0; | |
3839 buffer[offset + 1] = d1; | |
3840 bufsize = offset + 2; | |
3841 } | |
3842 | |
3843 void write(char_t d0, char_t d1, char_t d2) | |
3844 { | |
3845 size_t offset = bufsize; | |
3846 if (offset > bufcapacity - 3) offset = flush(); | |
3847 | |
3848 buffer[offset + 0] = d0; | |
3849 buffer[offset + 1] = d1; | |
3850 buffer[offset + 2] = d2; | |
3851 bufsize = offset + 3; | |
3852 } | |
3853 | |
3854 void write(char_t d0, char_t d1, char_t d2, char_t d3) | |
3855 { | |
3856 size_t offset = bufsize; | |
3857 if (offset > bufcapacity - 4) offset = flush(); | |
3858 | |
3859 buffer[offset + 0] = d0; | |
3860 buffer[offset + 1] = d1; | |
3861 buffer[offset + 2] = d2; | |
3862 buffer[offset + 3] = d3; | |
3863 bufsize = offset + 4; | |
3864 } | |
3865 | |
3866 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) | |
3867 { | |
3868 size_t offset = bufsize; | |
3869 if (offset > bufcapacity - 5) offset = flush(); | |
3870 | |
3871 buffer[offset + 0] = d0; | |
3872 buffer[offset + 1] = d1; | |
3873 buffer[offset + 2] = d2; | |
3874 buffer[offset + 3] = d3; | |
3875 buffer[offset + 4] = d4; | |
3876 bufsize = offset + 5; | |
3877 } | |
3878 | |
3879 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) | |
3880 { | |
3881 size_t offset = bufsize; | |
3882 if (offset > bufcapacity - 6) offset = flush(); | |
3883 | |
3884 buffer[offset + 0] = d0; | |
3885 buffer[offset + 1] = d1; | |
3886 buffer[offset + 2] = d2; | |
3887 buffer[offset + 3] = d3; | |
3888 buffer[offset + 4] = d4; | |
3889 buffer[offset + 5] = d5; | |
3890 bufsize = offset + 6; | |
3891 } | |
3892 | |
3893 // utf8 maximum expansion: x4 (-> utf32) | |
3894 // utf16 maximum expansion: x2 (-> utf32) | |
3895 // utf32 maximum expansion: x1 | |
3896 enum | |
3897 { | |
3898 bufcapacitybytes = | |
3899 #ifdef PUGIXML_MEMORY_OUTPUT_STACK | |
3900 PUGIXML_MEMORY_OUTPUT_STACK | |
3901 #else | |
3902 10240 | |
3903 #endif | |
3904 , | |
3905 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) | |
3906 }; | |
3907 | |
3908 char_t buffer[bufcapacity]; | |
3909 | |
3910 union | |
3911 { | |
3912 uint8_t data_u8[4 * bufcapacity]; | |
3913 uint16_t data_u16[2 * bufcapacity]; | |
3914 uint32_t data_u32[bufcapacity]; | |
3915 char_t data_char[bufcapacity]; | |
3916 } scratch; | |
3917 | |
3918 xml_writer& writer; | |
3919 size_t bufsize; | |
3920 xml_encoding encoding; | |
3921 }; | |
3922 | |
3923 PUGI__FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
3924 { | |
3925 while (*s) | |
3926 { | |
3927 const char_t* prev = s; | |
3928 | |
3929 // While *s is a usual symbol | |
3930 PUGI__SCANWHILE_UNROLL(!PUGI__IS_CHARTYPEX(ss, type)); | |
3931 | |
3932 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
3933 | |
3934 switch (*s) | |
3935 { | |
3936 case 0: break; | |
3937 case '&': | |
3938 writer.write('&', 'a', 'm', 'p', ';'); | |
3939 ++s; | |
3940 break; | |
3941 case '<': | |
3942 writer.write('&', 'l', 't', ';'); | |
3943 ++s; | |
3944 break; | |
3945 case '>': | |
3946 writer.write('&', 'g', 't', ';'); | |
3947 ++s; | |
3948 break; | |
3949 case '"': | |
3950 if (flags & format_attribute_single_quote) | |
3951 writer.write('"'); | |
3952 else | |
3953 writer.write('&', 'q', 'u', 'o', 't', ';'); | |
3954 ++s; | |
3955 break; | |
3956 case '\'': | |
3957 if (flags & format_attribute_single_quote) | |
3958 writer.write('&', 'a', 'p', 'o', 's', ';'); | |
3959 else | |
3960 writer.write('\''); | |
3961 ++s; | |
3962 break; | |
3963 default: // s is not a usual symbol | |
3964 { | |
3965 unsigned int ch = static_cast<unsigned int>(*s++); | |
3966 assert(ch < 32); | |
3967 | |
3968 if (!(flags & format_skip_control_chars)) | |
3969 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); | |
3970 } | |
3971 } | |
3972 } | |
3973 } | |
3974 | |
3975 PUGI__FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
3976 { | |
3977 if (flags & format_no_escapes) | |
3978 writer.write_string(s); | |
3979 else | |
3980 text_output_escaped(writer, s, type, flags); | |
3981 } | |
3982 | |
3983 PUGI__FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) | |
3984 { | |
3985 do | |
3986 { | |
3987 writer.write('<', '!', '[', 'C', 'D'); | |
3988 writer.write('A', 'T', 'A', '['); | |
3989 | |
3990 const char_t* prev = s; | |
3991 | |
3992 // look for ]]> sequence - we can't output it as is since it terminates CDATA | |
3993 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; | |
3994 | |
3995 // skip ]] if we stopped at ]]>, > will go to the next CDATA section | |
3996 if (*s) s += 2; | |
3997 | |
3998 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
3999 | |
4000 writer.write(']', ']', '>'); | |
4001 } | |
4002 while (*s); | |
4003 } | |
4004 | |
4005 PUGI__FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) | |
4006 { | |
4007 switch (indent_length) | |
4008 { | |
4009 case 1: | |
4010 { | |
4011 for (unsigned int i = 0; i < depth; ++i) | |
4012 writer.write(indent[0]); | |
4013 break; | |
4014 } | |
4015 | |
4016 case 2: | |
4017 { | |
4018 for (unsigned int i = 0; i < depth; ++i) | |
4019 writer.write(indent[0], indent[1]); | |
4020 break; | |
4021 } | |
4022 | |
4023 case 3: | |
4024 { | |
4025 for (unsigned int i = 0; i < depth; ++i) | |
4026 writer.write(indent[0], indent[1], indent[2]); | |
4027 break; | |
4028 } | |
4029 | |
4030 case 4: | |
4031 { | |
4032 for (unsigned int i = 0; i < depth; ++i) | |
4033 writer.write(indent[0], indent[1], indent[2], indent[3]); | |
4034 break; | |
4035 } | |
4036 | |
4037 default: | |
4038 { | |
4039 for (unsigned int i = 0; i < depth; ++i) | |
4040 writer.write_buffer(indent, indent_length); | |
4041 } | |
4042 } | |
4043 } | |
4044 | |
4045 PUGI__FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) | |
4046 { | |
4047 writer.write('<', '!', '-', '-'); | |
4048 | |
4049 while (*s) | |
4050 { | |
4051 const char_t* prev = s; | |
4052 | |
4053 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body | |
4054 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; | |
4055 | |
4056 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
4057 | |
4058 if (*s) | |
4059 { | |
4060 assert(*s == '-'); | |
4061 | |
4062 writer.write('-', ' '); | |
4063 ++s; | |
4064 } | |
4065 } | |
4066 | |
4067 writer.write('-', '-', '>'); | |
4068 } | |
4069 | |
4070 PUGI__FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) | |
4071 { | |
4072 while (*s) | |
4073 { | |
4074 const char_t* prev = s; | |
4075 | |
4076 // look for ?> sequence - we can't output it since ?> terminates PI | |
4077 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; | |
4078 | |
4079 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
4080 | |
4081 if (*s) | |
4082 { | |
4083 assert(s[0] == '?' && s[1] == '>'); | |
4084 | |
4085 writer.write('?', ' ', '>'); | |
4086 s += 2; | |
4087 } | |
4088 } | |
4089 } | |
4090 | |
4091 PUGI__FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
4092 { | |
4093 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4094 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"'; | |
4095 | |
4096 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
4097 { | |
4098 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) | |
4099 { | |
4100 writer.write('\n'); | |
4101 | |
4102 text_output_indent(writer, indent, indent_length, depth + 1); | |
4103 } | |
4104 else | |
4105 { | |
4106 writer.write(' '); | |
4107 } | |
4108 | |
4109 writer.write_string(a->name ? a->name + 0 : default_name); | |
4110 writer.write('=', enquotation_char); | |
4111 | |
4112 if (a->value) | |
4113 text_output(writer, a->value, ctx_special_attr, flags); | |
4114 | |
4115 writer.write(enquotation_char); | |
4116 } | |
4117 } | |
4118 | |
4119 PUGI__FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
4120 { | |
4121 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4122 const char_t* name = node->name ? node->name + 0 : default_name; | |
4123 | |
4124 writer.write('<'); | |
4125 writer.write_string(name); | |
4126 | |
4127 if (node->first_attribute) | |
4128 node_output_attributes(writer, node, indent, indent_length, flags, depth); | |
4129 | |
4130 // element nodes can have value if parse_embed_pcdata was used | |
4131 if (!node->value) | |
4132 { | |
4133 if (!node->first_child) | |
4134 { | |
4135 if (flags & format_no_empty_element_tags) | |
4136 { | |
4137 writer.write('>', '<', '/'); | |
4138 writer.write_string(name); | |
4139 writer.write('>'); | |
4140 | |
4141 return false; | |
4142 } | |
4143 else | |
4144 { | |
4145 if ((flags & format_raw) == 0) | |
4146 writer.write(' '); | |
4147 | |
4148 writer.write('/', '>'); | |
4149 | |
4150 return false; | |
4151 } | |
4152 } | |
4153 else | |
4154 { | |
4155 writer.write('>'); | |
4156 | |
4157 return true; | |
4158 } | |
4159 } | |
4160 else | |
4161 { | |
4162 writer.write('>'); | |
4163 | |
4164 text_output(writer, node->value, ctx_special_pcdata, flags); | |
4165 | |
4166 if (!node->first_child) | |
4167 { | |
4168 writer.write('<', '/'); | |
4169 writer.write_string(name); | |
4170 writer.write('>'); | |
4171 | |
4172 return false; | |
4173 } | |
4174 else | |
4175 { | |
4176 return true; | |
4177 } | |
4178 } | |
4179 } | |
4180 | |
4181 PUGI__FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) | |
4182 { | |
4183 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4184 const char_t* name = node->name ? node->name + 0 : default_name; | |
4185 | |
4186 writer.write('<', '/'); | |
4187 writer.write_string(name); | |
4188 writer.write('>'); | |
4189 } | |
4190 | |
4191 PUGI__FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) | |
4192 { | |
4193 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4194 | |
4195 switch (PUGI__NODETYPE(node)) | |
4196 { | |
4197 case node_pcdata: | |
4198 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); | |
4199 break; | |
4200 | |
4201 case node_cdata: | |
4202 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
4203 break; | |
4204 | |
4205 case node_comment: | |
4206 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
4207 break; | |
4208 | |
4209 case node_pi: | |
4210 writer.write('<', '?'); | |
4211 writer.write_string(node->name ? node->name + 0 : default_name); | |
4212 | |
4213 if (node->value) | |
4214 { | |
4215 writer.write(' '); | |
4216 node_output_pi_value(writer, node->value); | |
4217 } | |
4218 | |
4219 writer.write('?', '>'); | |
4220 break; | |
4221 | |
4222 case node_declaration: | |
4223 writer.write('<', '?'); | |
4224 writer.write_string(node->name ? node->name + 0 : default_name); | |
4225 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); | |
4226 writer.write('?', '>'); | |
4227 break; | |
4228 | |
4229 case node_doctype: | |
4230 writer.write('<', '!', 'D', 'O', 'C'); | |
4231 writer.write('T', 'Y', 'P', 'E'); | |
4232 | |
4233 if (node->value) | |
4234 { | |
4235 writer.write(' '); | |
4236 writer.write_string(node->value); | |
4237 } | |
4238 | |
4239 writer.write('>'); | |
4240 break; | |
4241 | |
4242 default: | |
4243 assert(false && "Invalid node type"); // unreachable | |
4244 } | |
4245 } | |
4246 | |
4247 enum indent_flags_t | |
4248 { | |
4249 indent_newline = 1, | |
4250 indent_indent = 2 | |
4251 }; | |
4252 | |
4253 PUGI__FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) | |
4254 { | |
4255 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; | |
4256 unsigned int indent_flags = indent_indent; | |
4257 | |
4258 xml_node_struct* node = root; | |
4259 | |
4260 do | |
4261 { | |
4262 assert(node); | |
4263 | |
4264 // begin writing current node | |
4265 if (PUGI__NODETYPE(node) == node_pcdata || PUGI__NODETYPE(node) == node_cdata) | |
4266 { | |
4267 node_output_simple(writer, node, flags); | |
4268 | |
4269 indent_flags = 0; | |
4270 } | |
4271 else | |
4272 { | |
4273 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4274 writer.write('\n'); | |
4275 | |
4276 if ((indent_flags & indent_indent) && indent_length) | |
4277 text_output_indent(writer, indent, indent_length, depth); | |
4278 | |
4279 if (PUGI__NODETYPE(node) == node_element) | |
4280 { | |
4281 indent_flags = indent_newline | indent_indent; | |
4282 | |
4283 if (node_output_start(writer, node, indent, indent_length, flags, depth)) | |
4284 { | |
4285 // element nodes can have value if parse_embed_pcdata was used | |
4286 if (node->value) | |
4287 indent_flags = 0; | |
4288 | |
4289 node = node->first_child; | |
4290 depth++; | |
4291 continue; | |
4292 } | |
4293 } | |
4294 else if (PUGI__NODETYPE(node) == node_document) | |
4295 { | |
4296 indent_flags = indent_indent; | |
4297 | |
4298 if (node->first_child) | |
4299 { | |
4300 node = node->first_child; | |
4301 continue; | |
4302 } | |
4303 } | |
4304 else | |
4305 { | |
4306 node_output_simple(writer, node, flags); | |
4307 | |
4308 indent_flags = indent_newline | indent_indent; | |
4309 } | |
4310 } | |
4311 | |
4312 // continue to the next node | |
4313 while (node != root) | |
4314 { | |
4315 if (node->next_sibling) | |
4316 { | |
4317 node = node->next_sibling; | |
4318 break; | |
4319 } | |
4320 | |
4321 node = node->parent; | |
4322 | |
4323 // write closing node | |
4324 if (PUGI__NODETYPE(node) == node_element) | |
4325 { | |
4326 depth--; | |
4327 | |
4328 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4329 writer.write('\n'); | |
4330 | |
4331 if ((indent_flags & indent_indent) && indent_length) | |
4332 text_output_indent(writer, indent, indent_length, depth); | |
4333 | |
4334 node_output_end(writer, node); | |
4335 | |
4336 indent_flags = indent_newline | indent_indent; | |
4337 } | |
4338 } | |
4339 } | |
4340 while (node != root); | |
4341 | |
4342 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4343 writer.write('\n'); | |
4344 } | |
4345 | |
4346 PUGI__FN bool has_declaration(xml_node_struct* node) | |
4347 { | |
4348 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) | |
4349 { | |
4350 xml_node_type type = PUGI__NODETYPE(child); | |
4351 | |
4352 if (type == node_declaration) return true; | |
4353 if (type == node_element) return false; | |
4354 } | |
4355 | |
4356 return false; | |
4357 } | |
4358 | |
4359 PUGI__FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) | |
4360 { | |
4361 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
4362 if (a == attr) | |
4363 return true; | |
4364 | |
4365 return false; | |
4366 } | |
4367 | |
4368 PUGI__FN bool allow_insert_attribute(xml_node_type parent) | |
4369 { | |
4370 return parent == node_element || parent == node_declaration; | |
4371 } | |
4372 | |
4373 PUGI__FN bool allow_insert_child(xml_node_type parent, xml_node_type child) | |
4374 { | |
4375 if (parent != node_document && parent != node_element) return false; | |
4376 if (child == node_document || child == node_null) return false; | |
4377 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; | |
4378 | |
4379 return true; | |
4380 } | |
4381 | |
4382 PUGI__FN bool allow_move(xml_node parent, xml_node child) | |
4383 { | |
4384 // check that child can be a child of parent | |
4385 if (!allow_insert_child(parent.type(), child.type())) | |
4386 return false; | |
4387 | |
4388 // check that node is not moved between documents | |
4389 if (parent.root() != child.root()) | |
4390 return false; | |
4391 | |
4392 // check that new parent is not in the child subtree | |
4393 xml_node cur = parent; | |
4394 | |
4395 while (cur) | |
4396 { | |
4397 if (cur == child) | |
4398 return false; | |
4399 | |
4400 cur = cur.parent(); | |
4401 } | |
4402 | |
4403 return true; | |
4404 } | |
4405 | |
4406 template <typename String, typename Header> | |
4407 PUGI__FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) | |
4408 { | |
4409 assert(!dest && (header & header_mask) == 0); | |
4410 | |
4411 if (source) | |
4412 { | |
4413 if (alloc && (source_header & header_mask) == 0) | |
4414 { | |
4415 dest = source; | |
4416 | |
4417 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared | |
4418 header |= xml_memory_page_contents_shared_mask; | |
4419 source_header |= xml_memory_page_contents_shared_mask; | |
4420 } | |
4421 else | |
4422 strcpy_insitu(dest, header, header_mask, source, strlength(source)); | |
4423 } | |
4424 } | |
4425 | |
4426 PUGI__FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) | |
4427 { | |
4428 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); | |
4429 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); | |
4430 | |
4431 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) | |
4432 { | |
4433 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); | |
4434 | |
4435 if (da) | |
4436 { | |
4437 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
4438 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
4439 } | |
4440 } | |
4441 } | |
4442 | |
4443 PUGI__FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) | |
4444 { | |
4445 xml_allocator& alloc = get_allocator(dn); | |
4446 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; | |
4447 | |
4448 node_copy_contents(dn, sn, shared_alloc); | |
4449 | |
4450 xml_node_struct* dit = dn; | |
4451 xml_node_struct* sit = sn->first_child; | |
4452 | |
4453 while (sit && sit != sn) | |
4454 { | |
4455 // loop invariant: dit is inside the subtree rooted at dn | |
4456 assert(dit); | |
4457 | |
4458 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop | |
4459 if (sit != dn) | |
4460 { | |
4461 xml_node_struct* copy = append_new_node(dit, alloc, PUGI__NODETYPE(sit)); | |
4462 | |
4463 if (copy) | |
4464 { | |
4465 node_copy_contents(copy, sit, shared_alloc); | |
4466 | |
4467 if (sit->first_child) | |
4468 { | |
4469 dit = copy; | |
4470 sit = sit->first_child; | |
4471 continue; | |
4472 } | |
4473 } | |
4474 } | |
4475 | |
4476 // continue to the next node | |
4477 do | |
4478 { | |
4479 if (sit->next_sibling) | |
4480 { | |
4481 sit = sit->next_sibling; | |
4482 break; | |
4483 } | |
4484 | |
4485 sit = sit->parent; | |
4486 dit = dit->parent; | |
4487 | |
4488 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn | |
4489 assert(sit == sn || dit); | |
4490 } | |
4491 while (sit != sn); | |
4492 } | |
4493 | |
4494 assert(!sit || dit == dn->parent); | |
4495 } | |
4496 | |
4497 PUGI__FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) | |
4498 { | |
4499 xml_allocator& alloc = get_allocator(da); | |
4500 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; | |
4501 | |
4502 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
4503 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
4504 } | |
4505 | |
4506 inline bool is_text_node(xml_node_struct* node) | |
4507 { | |
4508 xml_node_type type = PUGI__NODETYPE(node); | |
4509 | |
4510 return type == node_pcdata || type == node_cdata; | |
4511 } | |
4512 | |
4513 // get value with conversion functions | |
4514 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) | |
4515 { | |
4516 U result = 0; | |
4517 const char_t* s = value; | |
4518 | |
4519 while (PUGI__IS_CHARTYPE(*s, ct_space)) | |
4520 s++; | |
4521 | |
4522 bool negative = (*s == '-'); | |
4523 | |
4524 s += (*s == '+' || *s == '-'); | |
4525 | |
4526 bool overflow = false; | |
4527 | |
4528 if (s[0] == '0' && (s[1] | ' ') == 'x') | |
4529 { | |
4530 s += 2; | |
4531 | |
4532 // since overflow detection relies on length of the sequence skip leading zeros | |
4533 while (*s == '0') | |
4534 s++; | |
4535 | |
4536 const char_t* start = s; | |
4537 | |
4538 for (;;) | |
4539 { | |
4540 if (static_cast<unsigned>(*s - '0') < 10) | |
4541 result = result * 16 + (*s - '0'); | |
4542 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) | |
4543 result = result * 16 + ((*s | ' ') - 'a' + 10); | |
4544 else | |
4545 break; | |
4546 | |
4547 s++; | |
4548 } | |
4549 | |
4550 size_t digits = static_cast<size_t>(s - start); | |
4551 | |
4552 overflow = digits > sizeof(U) * 2; | |
4553 } | |
4554 else | |
4555 { | |
4556 // since overflow detection relies on length of the sequence skip leading zeros | |
4557 while (*s == '0') | |
4558 s++; | |
4559 | |
4560 const char_t* start = s; | |
4561 | |
4562 for (;;) | |
4563 { | |
4564 if (static_cast<unsigned>(*s - '0') < 10) | |
4565 result = result * 10 + (*s - '0'); | |
4566 else | |
4567 break; | |
4568 | |
4569 s++; | |
4570 } | |
4571 | |
4572 size_t digits = static_cast<size_t>(s - start); | |
4573 | |
4574 PUGI__STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); | |
4575 | |
4576 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; | |
4577 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; | |
4578 const size_t high_bit = sizeof(U) * 8 - 1; | |
4579 | |
4580 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); | |
4581 } | |
4582 | |
4583 if (negative) | |
4584 { | |
4585 // Workaround for crayc++ CC-3059: Expected no overflow in routine. | |
4586 #ifdef _CRAYC | |
4587 return (overflow || result > ~minv + 1) ? minv : ~result + 1; | |
4588 #else | |
4589 return (overflow || result > 0 - minv) ? minv : 0 - result; | |
4590 #endif | |
4591 } | |
4592 else | |
4593 return (overflow || result > maxv) ? maxv : result; | |
4594 } | |
4595 | |
4596 PUGI__FN int get_value_int(const char_t* value) | |
4597 { | |
4598 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); | |
4599 } | |
4600 | |
4601 PUGI__FN unsigned int get_value_uint(const char_t* value) | |
4602 { | |
4603 return string_to_integer<unsigned int>(value, 0, UINT_MAX); | |
4604 } | |
4605 | |
4606 PUGI__FN double get_value_double(const char_t* value) | |
4607 { | |
4608 #ifdef PUGIXML_WCHAR_MODE | |
4609 return wcstod(value, 0); | |
4610 #else | |
4611 return strtod(value, 0); | |
4612 #endif | |
4613 } | |
4614 | |
4615 PUGI__FN float get_value_float(const char_t* value) | |
4616 { | |
4617 #ifdef PUGIXML_WCHAR_MODE | |
4618 return static_cast<float>(wcstod(value, 0)); | |
4619 #else | |
4620 return static_cast<float>(strtod(value, 0)); | |
4621 #endif | |
4622 } | |
4623 | |
4624 PUGI__FN bool get_value_bool(const char_t* value) | |
4625 { | |
4626 // only look at first char | |
4627 char_t first = *value; | |
4628 | |
4629 // 1*, t* (true), T* (True), y* (yes), Y* (YES) | |
4630 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); | |
4631 } | |
4632 | |
4633 #ifdef PUGIXML_HAS_LONG_LONG | |
4634 PUGI__FN long long get_value_llong(const char_t* value) | |
4635 { | |
4636 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); | |
4637 } | |
4638 | |
4639 PUGI__FN unsigned long long get_value_ullong(const char_t* value) | |
4640 { | |
4641 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); | |
4642 } | |
4643 #endif | |
4644 | |
4645 template <typename U> PUGI__FN PUGI__UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) | |
4646 { | |
4647 char_t* result = end - 1; | |
4648 U rest = negative ? 0 - value : value; | |
4649 | |
4650 do | |
4651 { | |
4652 *result-- = static_cast<char_t>('0' + (rest % 10)); | |
4653 rest /= 10; | |
4654 } | |
4655 while (rest); | |
4656 | |
4657 assert(result >= begin); | |
4658 (void)begin; | |
4659 | |
4660 *result = '-'; | |
4661 | |
4662 return result + !negative; | |
4663 } | |
4664 | |
4665 // set value with conversion functions | |
4666 template <typename String, typename Header> | |
4667 PUGI__FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) | |
4668 { | |
4669 #ifdef PUGIXML_WCHAR_MODE | |
4670 char_t wbuf[128]; | |
4671 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); | |
4672 | |
4673 size_t offset = 0; | |
4674 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; | |
4675 | |
4676 return strcpy_insitu(dest, header, header_mask, wbuf, offset); | |
4677 #else | |
4678 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); | |
4679 #endif | |
4680 } | |
4681 | |
4682 template <typename U, typename String, typename Header> | |
4683 PUGI__FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) | |
4684 { | |
4685 char_t buf[64]; | |
4686 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); | |
4687 char_t* begin = integer_to_string(buf, end, value, negative); | |
4688 | |
4689 return strcpy_insitu(dest, header, header_mask, begin, end - begin); | |
4690 } | |
4691 | |
4692 template <typename String, typename Header> | |
4693 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) | |
4694 { | |
4695 char buf[128]; | |
4696 PUGI__SNPRINTF(buf, "%.*g", precision, double(value)); | |
4697 | |
4698 return set_value_ascii(dest, header, header_mask, buf); | |
4699 } | |
4700 | |
4701 template <typename String, typename Header> | |
4702 PUGI__FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) | |
4703 { | |
4704 char buf[128]; | |
4705 PUGI__SNPRINTF(buf, "%.*g", precision, value); | |
4706 | |
4707 return set_value_ascii(dest, header, header_mask, buf); | |
4708 } | |
4709 | |
4710 template <typename String, typename Header> | |
4711 PUGI__FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) | |
4712 { | |
4713 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); | |
4714 } | |
4715 | |
4716 PUGI__FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) | |
4717 { | |
4718 // check input buffer | |
4719 if (!contents && size) return make_parse_result(status_io_error); | |
4720 | |
4721 // get actual encoding | |
4722 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); | |
4723 | |
4724 // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it | |
4725 auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate); | |
4726 | |
4727 // get private buffer | |
4728 char_t* buffer = 0; | |
4729 size_t length = 0; | |
4730 | |
4731 // coverity[var_deref_model] | |
4732 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); | |
4733 | |
4734 // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it | |
4735 contents_guard.release(); | |
4736 | |
4737 // delete original buffer if we performed a conversion | |
4738 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); | |
4739 | |
4740 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself | |
4741 if (own || buffer != contents) *out_buffer = buffer; | |
4742 | |
4743 // store buffer for offset_debug | |
4744 doc->buffer = buffer; | |
4745 | |
4746 // parse | |
4747 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); | |
4748 | |
4749 // remember encoding | |
4750 res.encoding = buffer_encoding; | |
4751 | |
4752 return res; | |
4753 } | |
4754 | |
4755 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick | |
4756 PUGI__FN xml_parse_status get_file_size(FILE* file, size_t& out_result) | |
4757 { | |
4758 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
4759 // there are 64-bit versions of fseek/ftell, let's use them | |
4760 typedef __int64 length_type; | |
4761 | |
4762 _fseeki64(file, 0, SEEK_END); | |
4763 length_type length = _ftelli64(file); | |
4764 _fseeki64(file, 0, SEEK_SET); | |
4765 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) | |
4766 // there are 64-bit versions of fseek/ftell, let's use them | |
4767 typedef off64_t length_type; | |
4768 | |
4769 fseeko64(file, 0, SEEK_END); | |
4770 length_type length = ftello64(file); | |
4771 fseeko64(file, 0, SEEK_SET); | |
4772 #else | |
4773 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. | |
4774 typedef long length_type; | |
4775 | |
4776 fseek(file, 0, SEEK_END); | |
4777 length_type length = ftell(file); | |
4778 fseek(file, 0, SEEK_SET); | |
4779 #endif | |
4780 | |
4781 // check for I/O errors | |
4782 if (length < 0) return status_io_error; | |
4783 | |
4784 // check for overflow | |
4785 size_t result = static_cast<size_t>(length); | |
4786 | |
4787 if (static_cast<length_type>(result) != length) return status_out_of_memory; | |
4788 | |
4789 // finalize | |
4790 out_result = result; | |
4791 | |
4792 return status_ok; | |
4793 } | |
4794 | |
4795 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size | |
4796 PUGI__FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) | |
4797 { | |
4798 // We only need to zero-terminate if encoding conversion does not do it for us | |
4799 #ifdef PUGIXML_WCHAR_MODE | |
4800 xml_encoding wchar_encoding = get_wchar_encoding(); | |
4801 | |
4802 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) | |
4803 { | |
4804 size_t length = size / sizeof(char_t); | |
4805 | |
4806 static_cast<char_t*>(buffer)[length] = 0; | |
4807 return (length + 1) * sizeof(char_t); | |
4808 } | |
4809 #else | |
4810 if (encoding == encoding_utf8) | |
4811 { | |
4812 static_cast<char*>(buffer)[size] = 0; | |
4813 return size + 1; | |
4814 } | |
4815 #endif | |
4816 | |
4817 return size; | |
4818 } | |
4819 | |
4820 PUGI__FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
4821 { | |
4822 if (!file) return make_parse_result(status_file_not_found); | |
4823 | |
4824 // get file size (can result in I/O errors) | |
4825 size_t size = 0; | |
4826 xml_parse_status size_status = get_file_size(file, size); | |
4827 if (size_status != status_ok) return make_parse_result(size_status); | |
4828 | |
4829 size_t max_suffix_size = sizeof(char_t); | |
4830 | |
4831 // allocate buffer for the whole file | |
4832 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); | |
4833 if (!contents) return make_parse_result(status_out_of_memory); | |
4834 | |
4835 // read file in memory | |
4836 size_t read_size = fread(contents, 1, size, file); | |
4837 | |
4838 if (read_size != size) | |
4839 { | |
4840 xml_memory::deallocate(contents); | |
4841 return make_parse_result(status_io_error); | |
4842 } | |
4843 | |
4844 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); | |
4845 | |
4846 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
4847 } | |
4848 | |
4849 PUGI__FN void close_file(FILE* file) | |
4850 { | |
4851 fclose(file); | |
4852 } | |
4853 | |
4854 #ifndef PUGIXML_NO_STL | |
4855 template <typename T> struct xml_stream_chunk | |
4856 { | |
4857 static xml_stream_chunk* create() | |
4858 { | |
4859 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); | |
4860 if (!memory) return 0; | |
4861 | |
4862 return new (memory) xml_stream_chunk(); | |
4863 } | |
4864 | |
4865 static void destroy(xml_stream_chunk* chunk) | |
4866 { | |
4867 // free chunk chain | |
4868 while (chunk) | |
4869 { | |
4870 xml_stream_chunk* next_ = chunk->next; | |
4871 | |
4872 xml_memory::deallocate(chunk); | |
4873 | |
4874 chunk = next_; | |
4875 } | |
4876 } | |
4877 | |
4878 xml_stream_chunk(): next(0), size(0) | |
4879 { | |
4880 } | |
4881 | |
4882 xml_stream_chunk* next; | |
4883 size_t size; | |
4884 | |
4885 T data[xml_memory_page_size / sizeof(T)]; | |
4886 }; | |
4887 | |
4888 template <typename T> PUGI__FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
4889 { | |
4890 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); | |
4891 | |
4892 // read file to a chunk list | |
4893 size_t total = 0; | |
4894 xml_stream_chunk<T>* last = 0; | |
4895 | |
4896 while (!stream.eof()) | |
4897 { | |
4898 // allocate new chunk | |
4899 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); | |
4900 if (!chunk) return status_out_of_memory; | |
4901 | |
4902 // append chunk to list | |
4903 if (last) last = last->next = chunk; | |
4904 else chunks.data = last = chunk; | |
4905 | |
4906 // read data to chunk | |
4907 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); | |
4908 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); | |
4909 | |
4910 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors | |
4911 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
4912 | |
4913 // guard against huge files (chunk size is small enough to make this overflow check work) | |
4914 if (total + chunk->size < total) return status_out_of_memory; | |
4915 total += chunk->size; | |
4916 } | |
4917 | |
4918 size_t max_suffix_size = sizeof(char_t); | |
4919 | |
4920 // copy chunk list to a contiguous buffer | |
4921 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); | |
4922 if (!buffer) return status_out_of_memory; | |
4923 | |
4924 char* write = buffer; | |
4925 | |
4926 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) | |
4927 { | |
4928 assert(write + chunk->size <= buffer + total); | |
4929 memcpy(write, chunk->data, chunk->size); | |
4930 write += chunk->size; | |
4931 } | |
4932 | |
4933 assert(write == buffer + total); | |
4934 | |
4935 // return buffer | |
4936 *out_buffer = buffer; | |
4937 *out_size = total; | |
4938 | |
4939 return status_ok; | |
4940 } | |
4941 | |
4942 template <typename T> PUGI__FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
4943 { | |
4944 // get length of remaining data in stream | |
4945 typename std::basic_istream<T>::pos_type pos = stream.tellg(); | |
4946 stream.seekg(0, std::ios::end); | |
4947 std::streamoff length = stream.tellg() - pos; | |
4948 stream.seekg(pos); | |
4949 | |
4950 if (stream.fail() || pos < 0) return status_io_error; | |
4951 | |
4952 // guard against huge files | |
4953 size_t read_length = static_cast<size_t>(length); | |
4954 | |
4955 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; | |
4956 | |
4957 size_t max_suffix_size = sizeof(char_t); | |
4958 | |
4959 // read stream data into memory (guard against stream exceptions with buffer holder) | |
4960 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); | |
4961 if (!buffer.data) return status_out_of_memory; | |
4962 | |
4963 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); | |
4964 | |
4965 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors | |
4966 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
4967 | |
4968 // return buffer | |
4969 size_t actual_length = static_cast<size_t>(stream.gcount()); | |
4970 assert(actual_length <= read_length); | |
4971 | |
4972 *out_buffer = buffer.release(); | |
4973 *out_size = actual_length * sizeof(T); | |
4974 | |
4975 return status_ok; | |
4976 } | |
4977 | |
4978 template <typename T> PUGI__FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
4979 { | |
4980 void* buffer = 0; | |
4981 size_t size = 0; | |
4982 xml_parse_status status = status_ok; | |
4983 | |
4984 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) | |
4985 if (stream.fail()) return make_parse_result(status_io_error); | |
4986 | |
4987 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) | |
4988 if (stream.tellg() < 0) | |
4989 { | |
4990 stream.clear(); // clear error flags that could be set by a failing tellg | |
4991 status = load_stream_data_noseek(stream, &buffer, &size); | |
4992 } | |
4993 else | |
4994 status = load_stream_data_seek(stream, &buffer, &size); | |
4995 | |
4996 if (status != status_ok) return make_parse_result(status); | |
4997 | |
4998 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); | |
4999 | |
5000 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
5001 } | |
5002 #endif | |
5003 | |
5004 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) | |
5005 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
5006 { | |
5007 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
5008 FILE* file = 0; | |
5009 return _wfopen_s(&file, path, mode) == 0 ? file : 0; | |
5010 #else | |
5011 return _wfopen(path, mode); | |
5012 #endif | |
5013 } | |
5014 #else | |
5015 PUGI__FN char* convert_path_heap(const wchar_t* str) | |
5016 { | |
5017 assert(str); | |
5018 | |
5019 // first pass: get length in utf8 characters | |
5020 size_t length = strlength_wide(str); | |
5021 size_t size = as_utf8_begin(str, length); | |
5022 | |
5023 // allocate resulting string | |
5024 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); | |
5025 if (!result) return 0; | |
5026 | |
5027 // second pass: convert to utf8 | |
5028 as_utf8_end(result, size, str, length); | |
5029 | |
5030 // zero-terminate | |
5031 result[size] = 0; | |
5032 | |
5033 return result; | |
5034 } | |
5035 | |
5036 PUGI__FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
5037 { | |
5038 // there is no standard function to open wide paths, so our best bet is to try utf8 path | |
5039 char* path_utf8 = convert_path_heap(path); | |
5040 if (!path_utf8) return 0; | |
5041 | |
5042 // convert mode to ASCII (we mirror _wfopen interface) | |
5043 char mode_ascii[4] = {0}; | |
5044 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); | |
5045 | |
5046 // try to open the utf8 path | |
5047 FILE* result = fopen(path_utf8, mode_ascii); | |
5048 | |
5049 // free dummy buffer | |
5050 xml_memory::deallocate(path_utf8); | |
5051 | |
5052 return result; | |
5053 } | |
5054 #endif | |
5055 | |
5056 PUGI__FN FILE* open_file(const char* path, const char* mode) | |
5057 { | |
5058 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
5059 FILE* file = 0; | |
5060 return fopen_s(&file, path, mode) == 0 ? file : 0; | |
5061 #else | |
5062 return fopen(path, mode); | |
5063 #endif | |
5064 } | |
5065 | |
5066 PUGI__FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) | |
5067 { | |
5068 if (!file) return false; | |
5069 | |
5070 xml_writer_file writer(file); | |
5071 doc.save(writer, indent, flags, encoding); | |
5072 | |
5073 return fflush(file) == 0 && ferror(file) == 0; | |
5074 } | |
5075 | |
5076 struct name_null_sentry | |
5077 { | |
5078 xml_node_struct* node; | |
5079 char_t* name; | |
5080 | |
5081 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) | |
5082 { | |
5083 node->name = 0; | |
5084 } | |
5085 | |
5086 ~name_null_sentry() | |
5087 { | |
5088 node->name = name; | |
5089 } | |
5090 }; | |
5091 PUGI__NS_END | |
5092 | |
5093 namespace pugi | |
5094 { | |
5095 PUGI__FN xml_writer_file::xml_writer_file(void* file_): file(file_) | |
5096 { | |
5097 } | |
5098 | |
5099 PUGI__FN void xml_writer_file::write(const void* data, size_t size) | |
5100 { | |
5101 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); | |
5102 (void)!result; // unfortunately we can't do proper error handling here | |
5103 } | |
5104 | |
5105 #ifndef PUGIXML_NO_STL | |
5106 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) | |
5107 { | |
5108 } | |
5109 | |
5110 PUGI__FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) | |
5111 { | |
5112 } | |
5113 | |
5114 PUGI__FN void xml_writer_stream::write(const void* data, size_t size) | |
5115 { | |
5116 if (narrow_stream) | |
5117 { | |
5118 assert(!wide_stream); | |
5119 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); | |
5120 } | |
5121 else | |
5122 { | |
5123 assert(wide_stream); | |
5124 assert(size % sizeof(wchar_t) == 0); | |
5125 | |
5126 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); | |
5127 } | |
5128 } | |
5129 #endif | |
5130 | |
5131 PUGI__FN xml_tree_walker::xml_tree_walker(): _depth(0) | |
5132 { | |
5133 } | |
5134 | |
5135 PUGI__FN xml_tree_walker::~xml_tree_walker() | |
5136 { | |
5137 } | |
5138 | |
5139 PUGI__FN int xml_tree_walker::depth() const | |
5140 { | |
5141 return _depth; | |
5142 } | |
5143 | |
5144 PUGI__FN bool xml_tree_walker::begin(xml_node&) | |
5145 { | |
5146 return true; | |
5147 } | |
5148 | |
5149 PUGI__FN bool xml_tree_walker::end(xml_node&) | |
5150 { | |
5151 return true; | |
5152 } | |
5153 | |
5154 PUGI__FN xml_attribute::xml_attribute(): _attr(0) | |
5155 { | |
5156 } | |
5157 | |
5158 PUGI__FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) | |
5159 { | |
5160 } | |
5161 | |
5162 PUGI__FN static void unspecified_bool_xml_attribute(xml_attribute***) | |
5163 { | |
5164 } | |
5165 | |
5166 PUGI__FN xml_attribute::operator xml_attribute::unspecified_bool_type() const | |
5167 { | |
5168 return _attr ? unspecified_bool_xml_attribute : 0; | |
5169 } | |
5170 | |
5171 PUGI__FN bool xml_attribute::operator!() const | |
5172 { | |
5173 return !_attr; | |
5174 } | |
5175 | |
5176 PUGI__FN bool xml_attribute::operator==(const xml_attribute& r) const | |
5177 { | |
5178 return (_attr == r._attr); | |
5179 } | |
5180 | |
5181 PUGI__FN bool xml_attribute::operator!=(const xml_attribute& r) const | |
5182 { | |
5183 return (_attr != r._attr); | |
5184 } | |
5185 | |
5186 PUGI__FN bool xml_attribute::operator<(const xml_attribute& r) const | |
5187 { | |
5188 return (_attr < r._attr); | |
5189 } | |
5190 | |
5191 PUGI__FN bool xml_attribute::operator>(const xml_attribute& r) const | |
5192 { | |
5193 return (_attr > r._attr); | |
5194 } | |
5195 | |
5196 PUGI__FN bool xml_attribute::operator<=(const xml_attribute& r) const | |
5197 { | |
5198 return (_attr <= r._attr); | |
5199 } | |
5200 | |
5201 PUGI__FN bool xml_attribute::operator>=(const xml_attribute& r) const | |
5202 { | |
5203 return (_attr >= r._attr); | |
5204 } | |
5205 | |
5206 PUGI__FN xml_attribute xml_attribute::next_attribute() const | |
5207 { | |
5208 if (!_attr) return xml_attribute(); | |
5209 return xml_attribute(_attr->next_attribute); | |
5210 } | |
5211 | |
5212 PUGI__FN xml_attribute xml_attribute::previous_attribute() const | |
5213 { | |
5214 if (!_attr) return xml_attribute(); | |
5215 xml_attribute_struct* prev = _attr->prev_attribute_c; | |
5216 return prev->next_attribute ? xml_attribute(prev) : xml_attribute(); | |
5217 } | |
5218 | |
5219 PUGI__FN const char_t* xml_attribute::as_string(const char_t* def) const | |
5220 { | |
5221 if (!_attr) return def; | |
5222 const char_t* value = _attr->value; | |
5223 return value ? value : def; | |
5224 } | |
5225 | |
5226 PUGI__FN int xml_attribute::as_int(int def) const | |
5227 { | |
5228 if (!_attr) return def; | |
5229 const char_t* value = _attr->value; | |
5230 return value ? impl::get_value_int(value) : def; | |
5231 } | |
5232 | |
5233 PUGI__FN unsigned int xml_attribute::as_uint(unsigned int def) const | |
5234 { | |
5235 if (!_attr) return def; | |
5236 const char_t* value = _attr->value; | |
5237 return value ? impl::get_value_uint(value) : def; | |
5238 } | |
5239 | |
5240 PUGI__FN double xml_attribute::as_double(double def) const | |
5241 { | |
5242 if (!_attr) return def; | |
5243 const char_t* value = _attr->value; | |
5244 return value ? impl::get_value_double(value) : def; | |
5245 } | |
5246 | |
5247 PUGI__FN float xml_attribute::as_float(float def) const | |
5248 { | |
5249 if (!_attr) return def; | |
5250 const char_t* value = _attr->value; | |
5251 return value ? impl::get_value_float(value) : def; | |
5252 } | |
5253 | |
5254 PUGI__FN bool xml_attribute::as_bool(bool def) const | |
5255 { | |
5256 if (!_attr) return def; | |
5257 const char_t* value = _attr->value; | |
5258 return value ? impl::get_value_bool(value) : def; | |
5259 } | |
5260 | |
5261 #ifdef PUGIXML_HAS_LONG_LONG | |
5262 PUGI__FN long long xml_attribute::as_llong(long long def) const | |
5263 { | |
5264 if (!_attr) return def; | |
5265 const char_t* value = _attr->value; | |
5266 return value ? impl::get_value_llong(value) : def; | |
5267 } | |
5268 | |
5269 PUGI__FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const | |
5270 { | |
5271 if (!_attr) return def; | |
5272 const char_t* value = _attr->value; | |
5273 return value ? impl::get_value_ullong(value) : def; | |
5274 } | |
5275 #endif | |
5276 | |
5277 PUGI__FN bool xml_attribute::empty() const | |
5278 { | |
5279 return !_attr; | |
5280 } | |
5281 | |
5282 PUGI__FN const char_t* xml_attribute::name() const | |
5283 { | |
5284 if (!_attr) return PUGIXML_TEXT(""); | |
5285 const char_t* name = _attr->name; | |
5286 return name ? name : PUGIXML_TEXT(""); | |
5287 } | |
5288 | |
5289 PUGI__FN const char_t* xml_attribute::value() const | |
5290 { | |
5291 if (!_attr) return PUGIXML_TEXT(""); | |
5292 const char_t* value = _attr->value; | |
5293 return value ? value : PUGIXML_TEXT(""); | |
5294 } | |
5295 | |
5296 PUGI__FN size_t xml_attribute::hash_value() const | |
5297 { | |
5298 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); | |
5299 } | |
5300 | |
5301 PUGI__FN xml_attribute_struct* xml_attribute::internal_object() const | |
5302 { | |
5303 return _attr; | |
5304 } | |
5305 | |
5306 PUGI__FN xml_attribute& xml_attribute::operator=(const char_t* rhs) | |
5307 { | |
5308 set_value(rhs); | |
5309 return *this; | |
5310 } | |
5311 | |
5312 PUGI__FN xml_attribute& xml_attribute::operator=(int rhs) | |
5313 { | |
5314 set_value(rhs); | |
5315 return *this; | |
5316 } | |
5317 | |
5318 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned int rhs) | |
5319 { | |
5320 set_value(rhs); | |
5321 return *this; | |
5322 } | |
5323 | |
5324 PUGI__FN xml_attribute& xml_attribute::operator=(long rhs) | |
5325 { | |
5326 set_value(rhs); | |
5327 return *this; | |
5328 } | |
5329 | |
5330 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long rhs) | |
5331 { | |
5332 set_value(rhs); | |
5333 return *this; | |
5334 } | |
5335 | |
5336 PUGI__FN xml_attribute& xml_attribute::operator=(double rhs) | |
5337 { | |
5338 set_value(rhs); | |
5339 return *this; | |
5340 } | |
5341 | |
5342 PUGI__FN xml_attribute& xml_attribute::operator=(float rhs) | |
5343 { | |
5344 set_value(rhs); | |
5345 return *this; | |
5346 } | |
5347 | |
5348 PUGI__FN xml_attribute& xml_attribute::operator=(bool rhs) | |
5349 { | |
5350 set_value(rhs); | |
5351 return *this; | |
5352 } | |
5353 | |
5354 #ifdef PUGIXML_HAS_LONG_LONG | |
5355 PUGI__FN xml_attribute& xml_attribute::operator=(long long rhs) | |
5356 { | |
5357 set_value(rhs); | |
5358 return *this; | |
5359 } | |
5360 | |
5361 PUGI__FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) | |
5362 { | |
5363 set_value(rhs); | |
5364 return *this; | |
5365 } | |
5366 #endif | |
5367 | |
5368 PUGI__FN bool xml_attribute::set_name(const char_t* rhs) | |
5369 { | |
5370 if (!_attr) return false; | |
5371 | |
5372 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
5373 } | |
5374 | |
5375 PUGI__FN bool xml_attribute::set_value(const char_t* rhs, size_t sz) | |
5376 { | |
5377 if (!_attr) return false; | |
5378 | |
5379 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, sz); | |
5380 } | |
5381 | |
5382 PUGI__FN bool xml_attribute::set_value(const char_t* rhs) | |
5383 { | |
5384 if (!_attr) return false; | |
5385 | |
5386 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
5387 } | |
5388 | |
5389 PUGI__FN bool xml_attribute::set_value(int rhs) | |
5390 { | |
5391 if (!_attr) return false; | |
5392 | |
5393 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5394 } | |
5395 | |
5396 PUGI__FN bool xml_attribute::set_value(unsigned int rhs) | |
5397 { | |
5398 if (!_attr) return false; | |
5399 | |
5400 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5401 } | |
5402 | |
5403 PUGI__FN bool xml_attribute::set_value(long rhs) | |
5404 { | |
5405 if (!_attr) return false; | |
5406 | |
5407 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5408 } | |
5409 | |
5410 PUGI__FN bool xml_attribute::set_value(unsigned long rhs) | |
5411 { | |
5412 if (!_attr) return false; | |
5413 | |
5414 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5415 } | |
5416 | |
5417 PUGI__FN bool xml_attribute::set_value(double rhs) | |
5418 { | |
5419 if (!_attr) return false; | |
5420 | |
5421 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); | |
5422 } | |
5423 | |
5424 PUGI__FN bool xml_attribute::set_value(double rhs, int precision) | |
5425 { | |
5426 if (!_attr) return false; | |
5427 | |
5428 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
5429 } | |
5430 | |
5431 PUGI__FN bool xml_attribute::set_value(float rhs) | |
5432 { | |
5433 if (!_attr) return false; | |
5434 | |
5435 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); | |
5436 } | |
5437 | |
5438 PUGI__FN bool xml_attribute::set_value(float rhs, int precision) | |
5439 { | |
5440 if (!_attr) return false; | |
5441 | |
5442 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
5443 } | |
5444 | |
5445 PUGI__FN bool xml_attribute::set_value(bool rhs) | |
5446 { | |
5447 if (!_attr) return false; | |
5448 | |
5449 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); | |
5450 } | |
5451 | |
5452 #ifdef PUGIXML_HAS_LONG_LONG | |
5453 PUGI__FN bool xml_attribute::set_value(long long rhs) | |
5454 { | |
5455 if (!_attr) return false; | |
5456 | |
5457 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5458 } | |
5459 | |
5460 PUGI__FN bool xml_attribute::set_value(unsigned long long rhs) | |
5461 { | |
5462 if (!_attr) return false; | |
5463 | |
5464 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5465 } | |
5466 #endif | |
5467 | |
5468 #ifdef __BORLANDC__ | |
5469 PUGI__FN bool operator&&(const xml_attribute& lhs, bool rhs) | |
5470 { | |
5471 return (bool)lhs && rhs; | |
5472 } | |
5473 | |
5474 PUGI__FN bool operator||(const xml_attribute& lhs, bool rhs) | |
5475 { | |
5476 return (bool)lhs || rhs; | |
5477 } | |
5478 #endif | |
5479 | |
5480 PUGI__FN xml_node::xml_node(): _root(0) | |
5481 { | |
5482 } | |
5483 | |
5484 PUGI__FN xml_node::xml_node(xml_node_struct* p): _root(p) | |
5485 { | |
5486 } | |
5487 | |
5488 PUGI__FN static void unspecified_bool_xml_node(xml_node***) | |
5489 { | |
5490 } | |
5491 | |
5492 PUGI__FN xml_node::operator xml_node::unspecified_bool_type() const | |
5493 { | |
5494 return _root ? unspecified_bool_xml_node : 0; | |
5495 } | |
5496 | |
5497 PUGI__FN bool xml_node::operator!() const | |
5498 { | |
5499 return !_root; | |
5500 } | |
5501 | |
5502 PUGI__FN xml_node::iterator xml_node::begin() const | |
5503 { | |
5504 return iterator(_root ? _root->first_child + 0 : 0, _root); | |
5505 } | |
5506 | |
5507 PUGI__FN xml_node::iterator xml_node::end() const | |
5508 { | |
5509 return iterator(0, _root); | |
5510 } | |
5511 | |
5512 PUGI__FN xml_node::attribute_iterator xml_node::attributes_begin() const | |
5513 { | |
5514 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); | |
5515 } | |
5516 | |
5517 PUGI__FN xml_node::attribute_iterator xml_node::attributes_end() const | |
5518 { | |
5519 return attribute_iterator(0, _root); | |
5520 } | |
5521 | |
5522 PUGI__FN xml_object_range<xml_node_iterator> xml_node::children() const | |
5523 { | |
5524 return xml_object_range<xml_node_iterator>(begin(), end()); | |
5525 } | |
5526 | |
5527 PUGI__FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const | |
5528 { | |
5529 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); | |
5530 } | |
5531 | |
5532 PUGI__FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const | |
5533 { | |
5534 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); | |
5535 } | |
5536 | |
5537 PUGI__FN bool xml_node::operator==(const xml_node& r) const | |
5538 { | |
5539 return (_root == r._root); | |
5540 } | |
5541 | |
5542 PUGI__FN bool xml_node::operator!=(const xml_node& r) const | |
5543 { | |
5544 return (_root != r._root); | |
5545 } | |
5546 | |
5547 PUGI__FN bool xml_node::operator<(const xml_node& r) const | |
5548 { | |
5549 return (_root < r._root); | |
5550 } | |
5551 | |
5552 PUGI__FN bool xml_node::operator>(const xml_node& r) const | |
5553 { | |
5554 return (_root > r._root); | |
5555 } | |
5556 | |
5557 PUGI__FN bool xml_node::operator<=(const xml_node& r) const | |
5558 { | |
5559 return (_root <= r._root); | |
5560 } | |
5561 | |
5562 PUGI__FN bool xml_node::operator>=(const xml_node& r) const | |
5563 { | |
5564 return (_root >= r._root); | |
5565 } | |
5566 | |
5567 PUGI__FN bool xml_node::empty() const | |
5568 { | |
5569 return !_root; | |
5570 } | |
5571 | |
5572 PUGI__FN const char_t* xml_node::name() const | |
5573 { | |
5574 if (!_root) return PUGIXML_TEXT(""); | |
5575 const char_t* name = _root->name; | |
5576 return name ? name : PUGIXML_TEXT(""); | |
5577 } | |
5578 | |
5579 PUGI__FN xml_node_type xml_node::type() const | |
5580 { | |
5581 return _root ? PUGI__NODETYPE(_root) : node_null; | |
5582 } | |
5583 | |
5584 PUGI__FN const char_t* xml_node::value() const | |
5585 { | |
5586 if (!_root) return PUGIXML_TEXT(""); | |
5587 const char_t* value = _root->value; | |
5588 return value ? value : PUGIXML_TEXT(""); | |
5589 } | |
5590 | |
5591 PUGI__FN xml_node xml_node::child(const char_t* name_) const | |
5592 { | |
5593 if (!_root) return xml_node(); | |
5594 | |
5595 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
5596 { | |
5597 const char_t* iname = i->name; | |
5598 if (iname && impl::strequal(name_, iname)) | |
5599 return xml_node(i); | |
5600 } | |
5601 | |
5602 return xml_node(); | |
5603 } | |
5604 | |
5605 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_) const | |
5606 { | |
5607 if (!_root) return xml_attribute(); | |
5608 | |
5609 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) | |
5610 { | |
5611 const char_t* iname = i->name; | |
5612 if (iname && impl::strequal(name_, iname)) | |
5613 return xml_attribute(i); | |
5614 } | |
5615 | |
5616 return xml_attribute(); | |
5617 } | |
5618 | |
5619 PUGI__FN xml_node xml_node::next_sibling(const char_t* name_) const | |
5620 { | |
5621 if (!_root) return xml_node(); | |
5622 | |
5623 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) | |
5624 { | |
5625 const char_t* iname = i->name; | |
5626 if (iname && impl::strequal(name_, iname)) | |
5627 return xml_node(i); | |
5628 } | |
5629 | |
5630 return xml_node(); | |
5631 } | |
5632 | |
5633 PUGI__FN xml_node xml_node::next_sibling() const | |
5634 { | |
5635 return _root ? xml_node(_root->next_sibling) : xml_node(); | |
5636 } | |
5637 | |
5638 PUGI__FN xml_node xml_node::previous_sibling(const char_t* name_) const | |
5639 { | |
5640 if (!_root) return xml_node(); | |
5641 | |
5642 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) | |
5643 { | |
5644 const char_t* iname = i->name; | |
5645 if (iname && impl::strequal(name_, iname)) | |
5646 return xml_node(i); | |
5647 } | |
5648 | |
5649 return xml_node(); | |
5650 } | |
5651 | |
5652 PUGI__FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const | |
5653 { | |
5654 xml_attribute_struct* hint = hint_._attr; | |
5655 | |
5656 // if hint is not an attribute of node, behavior is not defined | |
5657 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); | |
5658 | |
5659 if (!_root) return xml_attribute(); | |
5660 | |
5661 // optimistically search from hint up until the end | |
5662 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) | |
5663 { | |
5664 const char_t* iname = i->name; | |
5665 if (iname && impl::strequal(name_, iname)) | |
5666 { | |
5667 // update hint to maximize efficiency of searching for consecutive attributes | |
5668 hint_._attr = i->next_attribute; | |
5669 | |
5670 return xml_attribute(i); | |
5671 } | |
5672 } | |
5673 | |
5674 // wrap around and search from the first attribute until the hint | |
5675 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails | |
5676 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) | |
5677 { | |
5678 const char_t* jname = j->name; | |
5679 if (jname && impl::strequal(name_, jname)) | |
5680 { | |
5681 // update hint to maximize efficiency of searching for consecutive attributes | |
5682 hint_._attr = j->next_attribute; | |
5683 | |
5684 return xml_attribute(j); | |
5685 } | |
5686 } | |
5687 | |
5688 return xml_attribute(); | |
5689 } | |
5690 | |
5691 PUGI__FN xml_node xml_node::previous_sibling() const | |
5692 { | |
5693 if (!_root) return xml_node(); | |
5694 xml_node_struct* prev = _root->prev_sibling_c; | |
5695 return prev->next_sibling ? xml_node(prev) : xml_node(); | |
5696 } | |
5697 | |
5698 PUGI__FN xml_node xml_node::parent() const | |
5699 { | |
5700 return _root ? xml_node(_root->parent) : xml_node(); | |
5701 } | |
5702 | |
5703 PUGI__FN xml_node xml_node::root() const | |
5704 { | |
5705 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); | |
5706 } | |
5707 | |
5708 PUGI__FN xml_text xml_node::text() const | |
5709 { | |
5710 return xml_text(_root); | |
5711 } | |
5712 | |
5713 PUGI__FN const char_t* xml_node::child_value() const | |
5714 { | |
5715 if (!_root) return PUGIXML_TEXT(""); | |
5716 | |
5717 // element nodes can have value if parse_embed_pcdata was used | |
5718 if (PUGI__NODETYPE(_root) == node_element && _root->value) | |
5719 return _root->value; | |
5720 | |
5721 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
5722 { | |
5723 const char_t* ivalue = i->value; | |
5724 if (impl::is_text_node(i) && ivalue) | |
5725 return ivalue; | |
5726 } | |
5727 | |
5728 return PUGIXML_TEXT(""); | |
5729 } | |
5730 | |
5731 PUGI__FN const char_t* xml_node::child_value(const char_t* name_) const | |
5732 { | |
5733 return child(name_).child_value(); | |
5734 } | |
5735 | |
5736 PUGI__FN xml_attribute xml_node::first_attribute() const | |
5737 { | |
5738 if (!_root) return xml_attribute(); | |
5739 return xml_attribute(_root->first_attribute); | |
5740 } | |
5741 | |
5742 PUGI__FN xml_attribute xml_node::last_attribute() const | |
5743 { | |
5744 if (!_root) return xml_attribute(); | |
5745 xml_attribute_struct* first = _root->first_attribute; | |
5746 return first ? xml_attribute(first->prev_attribute_c) : xml_attribute(); | |
5747 } | |
5748 | |
5749 PUGI__FN xml_node xml_node::first_child() const | |
5750 { | |
5751 if (!_root) return xml_node(); | |
5752 return xml_node(_root->first_child); | |
5753 } | |
5754 | |
5755 PUGI__FN xml_node xml_node::last_child() const | |
5756 { | |
5757 if (!_root) return xml_node(); | |
5758 xml_node_struct* first = _root->first_child; | |
5759 return first ? xml_node(first->prev_sibling_c) : xml_node(); | |
5760 } | |
5761 | |
5762 PUGI__FN bool xml_node::set_name(const char_t* rhs) | |
5763 { | |
5764 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
5765 | |
5766 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) | |
5767 return false; | |
5768 | |
5769 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
5770 } | |
5771 | |
5772 PUGI__FN bool xml_node::set_value(const char_t* rhs, size_t sz) | |
5773 { | |
5774 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
5775 | |
5776 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
5777 return false; | |
5778 | |
5779 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, sz); | |
5780 } | |
5781 | |
5782 PUGI__FN bool xml_node::set_value(const char_t* rhs) | |
5783 { | |
5784 xml_node_type type_ = _root ? PUGI__NODETYPE(_root) : node_null; | |
5785 | |
5786 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
5787 return false; | |
5788 | |
5789 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
5790 } | |
5791 | |
5792 PUGI__FN xml_attribute xml_node::append_attribute(const char_t* name_) | |
5793 { | |
5794 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5795 | |
5796 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5797 if (!alloc.reserve()) return xml_attribute(); | |
5798 | |
5799 xml_attribute a(impl::allocate_attribute(alloc)); | |
5800 if (!a) return xml_attribute(); | |
5801 | |
5802 impl::append_attribute(a._attr, _root); | |
5803 | |
5804 a.set_name(name_); | |
5805 | |
5806 return a; | |
5807 } | |
5808 | |
5809 PUGI__FN xml_attribute xml_node::prepend_attribute(const char_t* name_) | |
5810 { | |
5811 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5812 | |
5813 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5814 if (!alloc.reserve()) return xml_attribute(); | |
5815 | |
5816 xml_attribute a(impl::allocate_attribute(alloc)); | |
5817 if (!a) return xml_attribute(); | |
5818 | |
5819 impl::prepend_attribute(a._attr, _root); | |
5820 | |
5821 a.set_name(name_); | |
5822 | |
5823 return a; | |
5824 } | |
5825 | |
5826 PUGI__FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) | |
5827 { | |
5828 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5829 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5830 | |
5831 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5832 if (!alloc.reserve()) return xml_attribute(); | |
5833 | |
5834 xml_attribute a(impl::allocate_attribute(alloc)); | |
5835 if (!a) return xml_attribute(); | |
5836 | |
5837 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
5838 | |
5839 a.set_name(name_); | |
5840 | |
5841 return a; | |
5842 } | |
5843 | |
5844 PUGI__FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) | |
5845 { | |
5846 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5847 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5848 | |
5849 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5850 if (!alloc.reserve()) return xml_attribute(); | |
5851 | |
5852 xml_attribute a(impl::allocate_attribute(alloc)); | |
5853 if (!a) return xml_attribute(); | |
5854 | |
5855 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
5856 | |
5857 a.set_name(name_); | |
5858 | |
5859 return a; | |
5860 } | |
5861 | |
5862 PUGI__FN xml_attribute xml_node::append_copy(const xml_attribute& proto) | |
5863 { | |
5864 if (!proto) return xml_attribute(); | |
5865 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5866 | |
5867 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5868 if (!alloc.reserve()) return xml_attribute(); | |
5869 | |
5870 xml_attribute a(impl::allocate_attribute(alloc)); | |
5871 if (!a) return xml_attribute(); | |
5872 | |
5873 impl::append_attribute(a._attr, _root); | |
5874 impl::node_copy_attribute(a._attr, proto._attr); | |
5875 | |
5876 return a; | |
5877 } | |
5878 | |
5879 PUGI__FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) | |
5880 { | |
5881 if (!proto) return xml_attribute(); | |
5882 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5883 | |
5884 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5885 if (!alloc.reserve()) return xml_attribute(); | |
5886 | |
5887 xml_attribute a(impl::allocate_attribute(alloc)); | |
5888 if (!a) return xml_attribute(); | |
5889 | |
5890 impl::prepend_attribute(a._attr, _root); | |
5891 impl::node_copy_attribute(a._attr, proto._attr); | |
5892 | |
5893 return a; | |
5894 } | |
5895 | |
5896 PUGI__FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) | |
5897 { | |
5898 if (!proto) return xml_attribute(); | |
5899 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5900 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5901 | |
5902 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5903 if (!alloc.reserve()) return xml_attribute(); | |
5904 | |
5905 xml_attribute a(impl::allocate_attribute(alloc)); | |
5906 if (!a) return xml_attribute(); | |
5907 | |
5908 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
5909 impl::node_copy_attribute(a._attr, proto._attr); | |
5910 | |
5911 return a; | |
5912 } | |
5913 | |
5914 PUGI__FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) | |
5915 { | |
5916 if (!proto) return xml_attribute(); | |
5917 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5918 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5919 | |
5920 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5921 if (!alloc.reserve()) return xml_attribute(); | |
5922 | |
5923 xml_attribute a(impl::allocate_attribute(alloc)); | |
5924 if (!a) return xml_attribute(); | |
5925 | |
5926 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
5927 impl::node_copy_attribute(a._attr, proto._attr); | |
5928 | |
5929 return a; | |
5930 } | |
5931 | |
5932 PUGI__FN xml_node xml_node::append_child(xml_node_type type_) | |
5933 { | |
5934 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
5935 | |
5936 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5937 if (!alloc.reserve()) return xml_node(); | |
5938 | |
5939 xml_node n(impl::allocate_node(alloc, type_)); | |
5940 if (!n) return xml_node(); | |
5941 | |
5942 impl::append_node(n._root, _root); | |
5943 | |
5944 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
5945 | |
5946 return n; | |
5947 } | |
5948 | |
5949 PUGI__FN xml_node xml_node::prepend_child(xml_node_type type_) | |
5950 { | |
5951 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
5952 | |
5953 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5954 if (!alloc.reserve()) return xml_node(); | |
5955 | |
5956 xml_node n(impl::allocate_node(alloc, type_)); | |
5957 if (!n) return xml_node(); | |
5958 | |
5959 impl::prepend_node(n._root, _root); | |
5960 | |
5961 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
5962 | |
5963 return n; | |
5964 } | |
5965 | |
5966 PUGI__FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) | |
5967 { | |
5968 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
5969 if (!node._root || node._root->parent != _root) return xml_node(); | |
5970 | |
5971 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5972 if (!alloc.reserve()) return xml_node(); | |
5973 | |
5974 xml_node n(impl::allocate_node(alloc, type_)); | |
5975 if (!n) return xml_node(); | |
5976 | |
5977 impl::insert_node_before(n._root, node._root); | |
5978 | |
5979 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
5980 | |
5981 return n; | |
5982 } | |
5983 | |
5984 PUGI__FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) | |
5985 { | |
5986 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
5987 if (!node._root || node._root->parent != _root) return xml_node(); | |
5988 | |
5989 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5990 if (!alloc.reserve()) return xml_node(); | |
5991 | |
5992 xml_node n(impl::allocate_node(alloc, type_)); | |
5993 if (!n) return xml_node(); | |
5994 | |
5995 impl::insert_node_after(n._root, node._root); | |
5996 | |
5997 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
5998 | |
5999 return n; | |
6000 } | |
6001 | |
6002 PUGI__FN xml_node xml_node::append_child(const char_t* name_) | |
6003 { | |
6004 xml_node result = append_child(node_element); | |
6005 | |
6006 result.set_name(name_); | |
6007 | |
6008 return result; | |
6009 } | |
6010 | |
6011 PUGI__FN xml_node xml_node::prepend_child(const char_t* name_) | |
6012 { | |
6013 xml_node result = prepend_child(node_element); | |
6014 | |
6015 result.set_name(name_); | |
6016 | |
6017 return result; | |
6018 } | |
6019 | |
6020 PUGI__FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) | |
6021 { | |
6022 xml_node result = insert_child_after(node_element, node); | |
6023 | |
6024 result.set_name(name_); | |
6025 | |
6026 return result; | |
6027 } | |
6028 | |
6029 PUGI__FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) | |
6030 { | |
6031 xml_node result = insert_child_before(node_element, node); | |
6032 | |
6033 result.set_name(name_); | |
6034 | |
6035 return result; | |
6036 } | |
6037 | |
6038 PUGI__FN xml_node xml_node::append_copy(const xml_node& proto) | |
6039 { | |
6040 xml_node_type type_ = proto.type(); | |
6041 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6042 | |
6043 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6044 if (!alloc.reserve()) return xml_node(); | |
6045 | |
6046 xml_node n(impl::allocate_node(alloc, type_)); | |
6047 if (!n) return xml_node(); | |
6048 | |
6049 impl::append_node(n._root, _root); | |
6050 impl::node_copy_tree(n._root, proto._root); | |
6051 | |
6052 return n; | |
6053 } | |
6054 | |
6055 PUGI__FN xml_node xml_node::prepend_copy(const xml_node& proto) | |
6056 { | |
6057 xml_node_type type_ = proto.type(); | |
6058 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6059 | |
6060 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6061 if (!alloc.reserve()) return xml_node(); | |
6062 | |
6063 xml_node n(impl::allocate_node(alloc, type_)); | |
6064 if (!n) return xml_node(); | |
6065 | |
6066 impl::prepend_node(n._root, _root); | |
6067 impl::node_copy_tree(n._root, proto._root); | |
6068 | |
6069 return n; | |
6070 } | |
6071 | |
6072 PUGI__FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) | |
6073 { | |
6074 xml_node_type type_ = proto.type(); | |
6075 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6076 if (!node._root || node._root->parent != _root) return xml_node(); | |
6077 | |
6078 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6079 if (!alloc.reserve()) return xml_node(); | |
6080 | |
6081 xml_node n(impl::allocate_node(alloc, type_)); | |
6082 if (!n) return xml_node(); | |
6083 | |
6084 impl::insert_node_after(n._root, node._root); | |
6085 impl::node_copy_tree(n._root, proto._root); | |
6086 | |
6087 return n; | |
6088 } | |
6089 | |
6090 PUGI__FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) | |
6091 { | |
6092 xml_node_type type_ = proto.type(); | |
6093 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6094 if (!node._root || node._root->parent != _root) return xml_node(); | |
6095 | |
6096 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6097 if (!alloc.reserve()) return xml_node(); | |
6098 | |
6099 xml_node n(impl::allocate_node(alloc, type_)); | |
6100 if (!n) return xml_node(); | |
6101 | |
6102 impl::insert_node_before(n._root, node._root); | |
6103 impl::node_copy_tree(n._root, proto._root); | |
6104 | |
6105 return n; | |
6106 } | |
6107 | |
6108 PUGI__FN xml_node xml_node::append_move(const xml_node& moved) | |
6109 { | |
6110 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6111 | |
6112 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6113 if (!alloc.reserve()) return xml_node(); | |
6114 | |
6115 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6116 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6117 | |
6118 impl::remove_node(moved._root); | |
6119 impl::append_node(moved._root, _root); | |
6120 | |
6121 return moved; | |
6122 } | |
6123 | |
6124 PUGI__FN xml_node xml_node::prepend_move(const xml_node& moved) | |
6125 { | |
6126 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6127 | |
6128 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6129 if (!alloc.reserve()) return xml_node(); | |
6130 | |
6131 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6132 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6133 | |
6134 impl::remove_node(moved._root); | |
6135 impl::prepend_node(moved._root, _root); | |
6136 | |
6137 return moved; | |
6138 } | |
6139 | |
6140 PUGI__FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) | |
6141 { | |
6142 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6143 if (!node._root || node._root->parent != _root) return xml_node(); | |
6144 if (moved._root == node._root) return xml_node(); | |
6145 | |
6146 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6147 if (!alloc.reserve()) return xml_node(); | |
6148 | |
6149 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6150 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6151 | |
6152 impl::remove_node(moved._root); | |
6153 impl::insert_node_after(moved._root, node._root); | |
6154 | |
6155 return moved; | |
6156 } | |
6157 | |
6158 PUGI__FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) | |
6159 { | |
6160 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6161 if (!node._root || node._root->parent != _root) return xml_node(); | |
6162 if (moved._root == node._root) return xml_node(); | |
6163 | |
6164 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6165 if (!alloc.reserve()) return xml_node(); | |
6166 | |
6167 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6168 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6169 | |
6170 impl::remove_node(moved._root); | |
6171 impl::insert_node_before(moved._root, node._root); | |
6172 | |
6173 return moved; | |
6174 } | |
6175 | |
6176 PUGI__FN bool xml_node::remove_attribute(const char_t* name_) | |
6177 { | |
6178 return remove_attribute(attribute(name_)); | |
6179 } | |
6180 | |
6181 PUGI__FN bool xml_node::remove_attribute(const xml_attribute& a) | |
6182 { | |
6183 if (!_root || !a._attr) return false; | |
6184 if (!impl::is_attribute_of(a._attr, _root)) return false; | |
6185 | |
6186 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6187 if (!alloc.reserve()) return false; | |
6188 | |
6189 impl::remove_attribute(a._attr, _root); | |
6190 impl::destroy_attribute(a._attr, alloc); | |
6191 | |
6192 return true; | |
6193 } | |
6194 | |
6195 PUGI__FN bool xml_node::remove_attributes() | |
6196 { | |
6197 if (!_root) return false; | |
6198 | |
6199 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6200 if (!alloc.reserve()) return false; | |
6201 | |
6202 for (xml_attribute_struct* attr = _root->first_attribute; attr; ) | |
6203 { | |
6204 xml_attribute_struct* next = attr->next_attribute; | |
6205 | |
6206 impl::destroy_attribute(attr, alloc); | |
6207 | |
6208 attr = next; | |
6209 } | |
6210 | |
6211 _root->first_attribute = 0; | |
6212 | |
6213 return true; | |
6214 } | |
6215 | |
6216 PUGI__FN bool xml_node::remove_child(const char_t* name_) | |
6217 { | |
6218 return remove_child(child(name_)); | |
6219 } | |
6220 | |
6221 PUGI__FN bool xml_node::remove_child(const xml_node& n) | |
6222 { | |
6223 if (!_root || !n._root || n._root->parent != _root) return false; | |
6224 | |
6225 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6226 if (!alloc.reserve()) return false; | |
6227 | |
6228 impl::remove_node(n._root); | |
6229 impl::destroy_node(n._root, alloc); | |
6230 | |
6231 return true; | |
6232 } | |
6233 | |
6234 PUGI__FN bool xml_node::remove_children() | |
6235 { | |
6236 if (!_root) return false; | |
6237 | |
6238 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6239 if (!alloc.reserve()) return false; | |
6240 | |
6241 for (xml_node_struct* cur = _root->first_child; cur; ) | |
6242 { | |
6243 xml_node_struct* next = cur->next_sibling; | |
6244 | |
6245 impl::destroy_node(cur, alloc); | |
6246 | |
6247 cur = next; | |
6248 } | |
6249 | |
6250 _root->first_child = 0; | |
6251 | |
6252 return true; | |
6253 } | |
6254 | |
6255 PUGI__FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
6256 { | |
6257 // append_buffer is only valid for elements/documents | |
6258 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); | |
6259 | |
6260 // get document node | |
6261 impl::xml_document_struct* doc = &impl::get_document(_root); | |
6262 | |
6263 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense | |
6264 doc->header |= impl::xml_memory_page_contents_shared_mask; | |
6265 | |
6266 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) | |
6267 impl::xml_memory_page* page = 0; | |
6268 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); | |
6269 (void)page; | |
6270 | |
6271 if (!extra) return impl::make_parse_result(status_out_of_memory); | |
6272 | |
6273 #ifdef PUGIXML_COMPACT | |
6274 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned | |
6275 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account | |
6276 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); | |
6277 #endif | |
6278 | |
6279 // add extra buffer to the list | |
6280 extra->buffer = 0; | |
6281 extra->next = doc->extra_buffers; | |
6282 doc->extra_buffers = extra; | |
6283 | |
6284 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level | |
6285 impl::name_null_sentry sentry(_root); | |
6286 | |
6287 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); | |
6288 } | |
6289 | |
6290 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const | |
6291 { | |
6292 if (!_root) return xml_node(); | |
6293 | |
6294 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
6295 { | |
6296 const char_t* iname = i->name; | |
6297 if (iname && impl::strequal(name_, iname)) | |
6298 { | |
6299 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
6300 { | |
6301 const char_t* aname = a->name; | |
6302 if (aname && impl::strequal(attr_name, aname)) | |
6303 { | |
6304 const char_t* avalue = a->value; | |
6305 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
6306 return xml_node(i); | |
6307 } | |
6308 } | |
6309 } | |
6310 } | |
6311 | |
6312 return xml_node(); | |
6313 } | |
6314 | |
6315 PUGI__FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const | |
6316 { | |
6317 if (!_root) return xml_node(); | |
6318 | |
6319 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
6320 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
6321 { | |
6322 const char_t* aname = a->name; | |
6323 if (aname && impl::strequal(attr_name, aname)) | |
6324 { | |
6325 const char_t* avalue = a->value; | |
6326 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
6327 return xml_node(i); | |
6328 } | |
6329 } | |
6330 | |
6331 return xml_node(); | |
6332 } | |
6333 | |
6334 #ifndef PUGIXML_NO_STL | |
6335 PUGI__FN string_t xml_node::path(char_t delimiter) const | |
6336 { | |
6337 if (!_root) return string_t(); | |
6338 | |
6339 size_t offset = 0; | |
6340 | |
6341 for (xml_node_struct* i = _root; i; i = i->parent) | |
6342 { | |
6343 const char_t* iname = i->name; | |
6344 offset += (i != _root); | |
6345 offset += iname ? impl::strlength(iname) : 0; | |
6346 } | |
6347 | |
6348 string_t result; | |
6349 result.resize(offset); | |
6350 | |
6351 for (xml_node_struct* j = _root; j; j = j->parent) | |
6352 { | |
6353 if (j != _root) | |
6354 result[--offset] = delimiter; | |
6355 | |
6356 const char_t* jname = j->name; | |
6357 if (jname) | |
6358 { | |
6359 size_t length = impl::strlength(jname); | |
6360 | |
6361 offset -= length; | |
6362 memcpy(&result[offset], jname, length * sizeof(char_t)); | |
6363 } | |
6364 } | |
6365 | |
6366 assert(offset == 0); | |
6367 | |
6368 return result; | |
6369 } | |
6370 #endif | |
6371 | |
6372 PUGI__FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const | |
6373 { | |
6374 xml_node context = path_[0] == delimiter ? root() : *this; | |
6375 | |
6376 if (!context._root) return xml_node(); | |
6377 | |
6378 const char_t* path_segment = path_; | |
6379 | |
6380 while (*path_segment == delimiter) ++path_segment; | |
6381 | |
6382 const char_t* path_segment_end = path_segment; | |
6383 | |
6384 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; | |
6385 | |
6386 if (path_segment == path_segment_end) return context; | |
6387 | |
6388 const char_t* next_segment = path_segment_end; | |
6389 | |
6390 while (*next_segment == delimiter) ++next_segment; | |
6391 | |
6392 if (*path_segment == '.' && path_segment + 1 == path_segment_end) | |
6393 return context.first_element_by_path(next_segment, delimiter); | |
6394 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) | |
6395 return context.parent().first_element_by_path(next_segment, delimiter); | |
6396 else | |
6397 { | |
6398 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) | |
6399 { | |
6400 const char_t* jname = j->name; | |
6401 if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment))) | |
6402 { | |
6403 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); | |
6404 | |
6405 if (subsearch) return subsearch; | |
6406 } | |
6407 } | |
6408 | |
6409 return xml_node(); | |
6410 } | |
6411 } | |
6412 | |
6413 PUGI__FN bool xml_node::traverse(xml_tree_walker& walker) | |
6414 { | |
6415 walker._depth = -1; | |
6416 | |
6417 xml_node arg_begin(_root); | |
6418 if (!walker.begin(arg_begin)) return false; | |
6419 | |
6420 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; | |
6421 | |
6422 if (cur) | |
6423 { | |
6424 ++walker._depth; | |
6425 | |
6426 do | |
6427 { | |
6428 xml_node arg_for_each(cur); | |
6429 if (!walker.for_each(arg_for_each)) | |
6430 return false; | |
6431 | |
6432 if (cur->first_child) | |
6433 { | |
6434 ++walker._depth; | |
6435 cur = cur->first_child; | |
6436 } | |
6437 else if (cur->next_sibling) | |
6438 cur = cur->next_sibling; | |
6439 else | |
6440 { | |
6441 while (!cur->next_sibling && cur != _root && cur->parent) | |
6442 { | |
6443 --walker._depth; | |
6444 cur = cur->parent; | |
6445 } | |
6446 | |
6447 if (cur != _root) | |
6448 cur = cur->next_sibling; | |
6449 } | |
6450 } | |
6451 while (cur && cur != _root); | |
6452 } | |
6453 | |
6454 assert(walker._depth == -1); | |
6455 | |
6456 xml_node arg_end(_root); | |
6457 return walker.end(arg_end); | |
6458 } | |
6459 | |
6460 PUGI__FN size_t xml_node::hash_value() const | |
6461 { | |
6462 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); | |
6463 } | |
6464 | |
6465 PUGI__FN xml_node_struct* xml_node::internal_object() const | |
6466 { | |
6467 return _root; | |
6468 } | |
6469 | |
6470 PUGI__FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
6471 { | |
6472 if (!_root) return; | |
6473 | |
6474 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
6475 | |
6476 impl::node_output(buffered_writer, _root, indent, flags, depth); | |
6477 | |
6478 buffered_writer.flush(); | |
6479 } | |
6480 | |
6481 #ifndef PUGIXML_NO_STL | |
6482 PUGI__FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
6483 { | |
6484 xml_writer_stream writer(stream); | |
6485 | |
6486 print(writer, indent, flags, encoding, depth); | |
6487 } | |
6488 | |
6489 PUGI__FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const | |
6490 { | |
6491 xml_writer_stream writer(stream); | |
6492 | |
6493 print(writer, indent, flags, encoding_wchar, depth); | |
6494 } | |
6495 #endif | |
6496 | |
6497 PUGI__FN ptrdiff_t xml_node::offset_debug() const | |
6498 { | |
6499 if (!_root) return -1; | |
6500 | |
6501 impl::xml_document_struct& doc = impl::get_document(_root); | |
6502 | |
6503 // we can determine the offset reliably only if there is exactly once parse buffer | |
6504 if (!doc.buffer || doc.extra_buffers) return -1; | |
6505 | |
6506 switch (type()) | |
6507 { | |
6508 case node_document: | |
6509 return 0; | |
6510 | |
6511 case node_element: | |
6512 case node_declaration: | |
6513 case node_pi: | |
6514 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; | |
6515 | |
6516 case node_pcdata: | |
6517 case node_cdata: | |
6518 case node_comment: | |
6519 case node_doctype: | |
6520 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; | |
6521 | |
6522 default: | |
6523 assert(false && "Invalid node type"); // unreachable | |
6524 return -1; | |
6525 } | |
6526 } | |
6527 | |
6528 #ifdef __BORLANDC__ | |
6529 PUGI__FN bool operator&&(const xml_node& lhs, bool rhs) | |
6530 { | |
6531 return (bool)lhs && rhs; | |
6532 } | |
6533 | |
6534 PUGI__FN bool operator||(const xml_node& lhs, bool rhs) | |
6535 { | |
6536 return (bool)lhs || rhs; | |
6537 } | |
6538 #endif | |
6539 | |
6540 PUGI__FN xml_text::xml_text(xml_node_struct* root): _root(root) | |
6541 { | |
6542 } | |
6543 | |
6544 PUGI__FN xml_node_struct* xml_text::_data() const | |
6545 { | |
6546 if (!_root || impl::is_text_node(_root)) return _root; | |
6547 | |
6548 // element nodes can have value if parse_embed_pcdata was used | |
6549 if (PUGI__NODETYPE(_root) == node_element && _root->value) | |
6550 return _root; | |
6551 | |
6552 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) | |
6553 if (impl::is_text_node(node)) | |
6554 return node; | |
6555 | |
6556 return 0; | |
6557 } | |
6558 | |
6559 PUGI__FN xml_node_struct* xml_text::_data_new() | |
6560 { | |
6561 xml_node_struct* d = _data(); | |
6562 if (d) return d; | |
6563 | |
6564 return xml_node(_root).append_child(node_pcdata).internal_object(); | |
6565 } | |
6566 | |
6567 PUGI__FN xml_text::xml_text(): _root(0) | |
6568 { | |
6569 } | |
6570 | |
6571 PUGI__FN static void unspecified_bool_xml_text(xml_text***) | |
6572 { | |
6573 } | |
6574 | |
6575 PUGI__FN xml_text::operator xml_text::unspecified_bool_type() const | |
6576 { | |
6577 return _data() ? unspecified_bool_xml_text : 0; | |
6578 } | |
6579 | |
6580 PUGI__FN bool xml_text::operator!() const | |
6581 { | |
6582 return !_data(); | |
6583 } | |
6584 | |
6585 PUGI__FN bool xml_text::empty() const | |
6586 { | |
6587 return _data() == 0; | |
6588 } | |
6589 | |
6590 PUGI__FN const char_t* xml_text::get() const | |
6591 { | |
6592 xml_node_struct* d = _data(); | |
6593 if (!d) return PUGIXML_TEXT(""); | |
6594 const char_t* value = d->value; | |
6595 return value ? value : PUGIXML_TEXT(""); | |
6596 } | |
6597 | |
6598 PUGI__FN const char_t* xml_text::as_string(const char_t* def) const | |
6599 { | |
6600 xml_node_struct* d = _data(); | |
6601 if (!d) return def; | |
6602 const char_t* value = d->value; | |
6603 return value ? value : def; | |
6604 } | |
6605 | |
6606 PUGI__FN int xml_text::as_int(int def) const | |
6607 { | |
6608 xml_node_struct* d = _data(); | |
6609 if (!d) return def; | |
6610 const char_t* value = d->value; | |
6611 return value ? impl::get_value_int(value) : def; | |
6612 } | |
6613 | |
6614 PUGI__FN unsigned int xml_text::as_uint(unsigned int def) const | |
6615 { | |
6616 xml_node_struct* d = _data(); | |
6617 if (!d) return def; | |
6618 const char_t* value = d->value; | |
6619 return value ? impl::get_value_uint(value) : def; | |
6620 } | |
6621 | |
6622 PUGI__FN double xml_text::as_double(double def) const | |
6623 { | |
6624 xml_node_struct* d = _data(); | |
6625 if (!d) return def; | |
6626 const char_t* value = d->value; | |
6627 return value ? impl::get_value_double(value) : def; | |
6628 } | |
6629 | |
6630 PUGI__FN float xml_text::as_float(float def) const | |
6631 { | |
6632 xml_node_struct* d = _data(); | |
6633 if (!d) return def; | |
6634 const char_t* value = d->value; | |
6635 return value ? impl::get_value_float(value) : def; | |
6636 } | |
6637 | |
6638 PUGI__FN bool xml_text::as_bool(bool def) const | |
6639 { | |
6640 xml_node_struct* d = _data(); | |
6641 if (!d) return def; | |
6642 const char_t* value = d->value; | |
6643 return value ? impl::get_value_bool(value) : def; | |
6644 } | |
6645 | |
6646 #ifdef PUGIXML_HAS_LONG_LONG | |
6647 PUGI__FN long long xml_text::as_llong(long long def) const | |
6648 { | |
6649 xml_node_struct* d = _data(); | |
6650 if (!d) return def; | |
6651 const char_t* value = d->value; | |
6652 return value ? impl::get_value_llong(value) : def; | |
6653 } | |
6654 | |
6655 PUGI__FN unsigned long long xml_text::as_ullong(unsigned long long def) const | |
6656 { | |
6657 xml_node_struct* d = _data(); | |
6658 if (!d) return def; | |
6659 const char_t* value = d->value; | |
6660 return value ? impl::get_value_ullong(value) : def; | |
6661 } | |
6662 #endif | |
6663 | |
6664 PUGI__FN bool xml_text::set(const char_t* rhs, size_t sz) | |
6665 { | |
6666 xml_node_struct* dn = _data_new(); | |
6667 | |
6668 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, sz) : false; | |
6669 } | |
6670 | |
6671 PUGI__FN bool xml_text::set(const char_t* rhs) | |
6672 { | |
6673 xml_node_struct* dn = _data_new(); | |
6674 | |
6675 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; | |
6676 } | |
6677 | |
6678 PUGI__FN bool xml_text::set(int rhs) | |
6679 { | |
6680 xml_node_struct* dn = _data_new(); | |
6681 | |
6682 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6683 } | |
6684 | |
6685 PUGI__FN bool xml_text::set(unsigned int rhs) | |
6686 { | |
6687 xml_node_struct* dn = _data_new(); | |
6688 | |
6689 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6690 } | |
6691 | |
6692 PUGI__FN bool xml_text::set(long rhs) | |
6693 { | |
6694 xml_node_struct* dn = _data_new(); | |
6695 | |
6696 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6697 } | |
6698 | |
6699 PUGI__FN bool xml_text::set(unsigned long rhs) | |
6700 { | |
6701 xml_node_struct* dn = _data_new(); | |
6702 | |
6703 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6704 } | |
6705 | |
6706 PUGI__FN bool xml_text::set(float rhs) | |
6707 { | |
6708 xml_node_struct* dn = _data_new(); | |
6709 | |
6710 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; | |
6711 } | |
6712 | |
6713 PUGI__FN bool xml_text::set(float rhs, int precision) | |
6714 { | |
6715 xml_node_struct* dn = _data_new(); | |
6716 | |
6717 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
6718 } | |
6719 | |
6720 PUGI__FN bool xml_text::set(double rhs) | |
6721 { | |
6722 xml_node_struct* dn = _data_new(); | |
6723 | |
6724 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; | |
6725 } | |
6726 | |
6727 PUGI__FN bool xml_text::set(double rhs, int precision) | |
6728 { | |
6729 xml_node_struct* dn = _data_new(); | |
6730 | |
6731 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
6732 } | |
6733 | |
6734 PUGI__FN bool xml_text::set(bool rhs) | |
6735 { | |
6736 xml_node_struct* dn = _data_new(); | |
6737 | |
6738 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; | |
6739 } | |
6740 | |
6741 #ifdef PUGIXML_HAS_LONG_LONG | |
6742 PUGI__FN bool xml_text::set(long long rhs) | |
6743 { | |
6744 xml_node_struct* dn = _data_new(); | |
6745 | |
6746 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6747 } | |
6748 | |
6749 PUGI__FN bool xml_text::set(unsigned long long rhs) | |
6750 { | |
6751 xml_node_struct* dn = _data_new(); | |
6752 | |
6753 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6754 } | |
6755 #endif | |
6756 | |
6757 PUGI__FN xml_text& xml_text::operator=(const char_t* rhs) | |
6758 { | |
6759 set(rhs); | |
6760 return *this; | |
6761 } | |
6762 | |
6763 PUGI__FN xml_text& xml_text::operator=(int rhs) | |
6764 { | |
6765 set(rhs); | |
6766 return *this; | |
6767 } | |
6768 | |
6769 PUGI__FN xml_text& xml_text::operator=(unsigned int rhs) | |
6770 { | |
6771 set(rhs); | |
6772 return *this; | |
6773 } | |
6774 | |
6775 PUGI__FN xml_text& xml_text::operator=(long rhs) | |
6776 { | |
6777 set(rhs); | |
6778 return *this; | |
6779 } | |
6780 | |
6781 PUGI__FN xml_text& xml_text::operator=(unsigned long rhs) | |
6782 { | |
6783 set(rhs); | |
6784 return *this; | |
6785 } | |
6786 | |
6787 PUGI__FN xml_text& xml_text::operator=(double rhs) | |
6788 { | |
6789 set(rhs); | |
6790 return *this; | |
6791 } | |
6792 | |
6793 PUGI__FN xml_text& xml_text::operator=(float rhs) | |
6794 { | |
6795 set(rhs); | |
6796 return *this; | |
6797 } | |
6798 | |
6799 PUGI__FN xml_text& xml_text::operator=(bool rhs) | |
6800 { | |
6801 set(rhs); | |
6802 return *this; | |
6803 } | |
6804 | |
6805 #ifdef PUGIXML_HAS_LONG_LONG | |
6806 PUGI__FN xml_text& xml_text::operator=(long long rhs) | |
6807 { | |
6808 set(rhs); | |
6809 return *this; | |
6810 } | |
6811 | |
6812 PUGI__FN xml_text& xml_text::operator=(unsigned long long rhs) | |
6813 { | |
6814 set(rhs); | |
6815 return *this; | |
6816 } | |
6817 #endif | |
6818 | |
6819 PUGI__FN xml_node xml_text::data() const | |
6820 { | |
6821 return xml_node(_data()); | |
6822 } | |
6823 | |
6824 #ifdef __BORLANDC__ | |
6825 PUGI__FN bool operator&&(const xml_text& lhs, bool rhs) | |
6826 { | |
6827 return (bool)lhs && rhs; | |
6828 } | |
6829 | |
6830 PUGI__FN bool operator||(const xml_text& lhs, bool rhs) | |
6831 { | |
6832 return (bool)lhs || rhs; | |
6833 } | |
6834 #endif | |
6835 | |
6836 PUGI__FN xml_node_iterator::xml_node_iterator() | |
6837 { | |
6838 } | |
6839 | |
6840 PUGI__FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) | |
6841 { | |
6842 } | |
6843 | |
6844 PUGI__FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
6845 { | |
6846 } | |
6847 | |
6848 PUGI__FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const | |
6849 { | |
6850 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
6851 } | |
6852 | |
6853 PUGI__FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const | |
6854 { | |
6855 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
6856 } | |
6857 | |
6858 PUGI__FN xml_node& xml_node_iterator::operator*() const | |
6859 { | |
6860 assert(_wrap._root); | |
6861 return _wrap; | |
6862 } | |
6863 | |
6864 PUGI__FN xml_node* xml_node_iterator::operator->() const | |
6865 { | |
6866 assert(_wrap._root); | |
6867 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
6868 } | |
6869 | |
6870 PUGI__FN xml_node_iterator& xml_node_iterator::operator++() | |
6871 { | |
6872 assert(_wrap._root); | |
6873 _wrap._root = _wrap._root->next_sibling; | |
6874 return *this; | |
6875 } | |
6876 | |
6877 PUGI__FN xml_node_iterator xml_node_iterator::operator++(int) | |
6878 { | |
6879 xml_node_iterator temp = *this; | |
6880 ++*this; | |
6881 return temp; | |
6882 } | |
6883 | |
6884 PUGI__FN xml_node_iterator& xml_node_iterator::operator--() | |
6885 { | |
6886 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); | |
6887 return *this; | |
6888 } | |
6889 | |
6890 PUGI__FN xml_node_iterator xml_node_iterator::operator--(int) | |
6891 { | |
6892 xml_node_iterator temp = *this; | |
6893 --*this; | |
6894 return temp; | |
6895 } | |
6896 | |
6897 PUGI__FN xml_attribute_iterator::xml_attribute_iterator() | |
6898 { | |
6899 } | |
6900 | |
6901 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) | |
6902 { | |
6903 } | |
6904 | |
6905 PUGI__FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
6906 { | |
6907 } | |
6908 | |
6909 PUGI__FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const | |
6910 { | |
6911 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; | |
6912 } | |
6913 | |
6914 PUGI__FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const | |
6915 { | |
6916 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; | |
6917 } | |
6918 | |
6919 PUGI__FN xml_attribute& xml_attribute_iterator::operator*() const | |
6920 { | |
6921 assert(_wrap._attr); | |
6922 return _wrap; | |
6923 } | |
6924 | |
6925 PUGI__FN xml_attribute* xml_attribute_iterator::operator->() const | |
6926 { | |
6927 assert(_wrap._attr); | |
6928 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround | |
6929 } | |
6930 | |
6931 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator++() | |
6932 { | |
6933 assert(_wrap._attr); | |
6934 _wrap._attr = _wrap._attr->next_attribute; | |
6935 return *this; | |
6936 } | |
6937 | |
6938 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator++(int) | |
6939 { | |
6940 xml_attribute_iterator temp = *this; | |
6941 ++*this; | |
6942 return temp; | |
6943 } | |
6944 | |
6945 PUGI__FN xml_attribute_iterator& xml_attribute_iterator::operator--() | |
6946 { | |
6947 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); | |
6948 return *this; | |
6949 } | |
6950 | |
6951 PUGI__FN xml_attribute_iterator xml_attribute_iterator::operator--(int) | |
6952 { | |
6953 xml_attribute_iterator temp = *this; | |
6954 --*this; | |
6955 return temp; | |
6956 } | |
6957 | |
6958 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) | |
6959 { | |
6960 } | |
6961 | |
6962 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) | |
6963 { | |
6964 } | |
6965 | |
6966 PUGI__FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) | |
6967 { | |
6968 } | |
6969 | |
6970 PUGI__FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const | |
6971 { | |
6972 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
6973 } | |
6974 | |
6975 PUGI__FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const | |
6976 { | |
6977 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
6978 } | |
6979 | |
6980 PUGI__FN xml_node& xml_named_node_iterator::operator*() const | |
6981 { | |
6982 assert(_wrap._root); | |
6983 return _wrap; | |
6984 } | |
6985 | |
6986 PUGI__FN xml_node* xml_named_node_iterator::operator->() const | |
6987 { | |
6988 assert(_wrap._root); | |
6989 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
6990 } | |
6991 | |
6992 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator++() | |
6993 { | |
6994 assert(_wrap._root); | |
6995 _wrap = _wrap.next_sibling(_name); | |
6996 return *this; | |
6997 } | |
6998 | |
6999 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator++(int) | |
7000 { | |
7001 xml_named_node_iterator temp = *this; | |
7002 ++*this; | |
7003 return temp; | |
7004 } | |
7005 | |
7006 PUGI__FN xml_named_node_iterator& xml_named_node_iterator::operator--() | |
7007 { | |
7008 if (_wrap._root) | |
7009 _wrap = _wrap.previous_sibling(_name); | |
7010 else | |
7011 { | |
7012 _wrap = _parent.last_child(); | |
7013 | |
7014 if (!impl::strequal(_wrap.name(), _name)) | |
7015 _wrap = _wrap.previous_sibling(_name); | |
7016 } | |
7017 | |
7018 return *this; | |
7019 } | |
7020 | |
7021 PUGI__FN xml_named_node_iterator xml_named_node_iterator::operator--(int) | |
7022 { | |
7023 xml_named_node_iterator temp = *this; | |
7024 --*this; | |
7025 return temp; | |
7026 } | |
7027 | |
7028 PUGI__FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) | |
7029 { | |
7030 } | |
7031 | |
7032 PUGI__FN xml_parse_result::operator bool() const | |
7033 { | |
7034 return status == status_ok; | |
7035 } | |
7036 | |
7037 PUGI__FN const char* xml_parse_result::description() const | |
7038 { | |
7039 switch (status) | |
7040 { | |
7041 case status_ok: return "No error"; | |
7042 | |
7043 case status_file_not_found: return "File was not found"; | |
7044 case status_io_error: return "Error reading from file/stream"; | |
7045 case status_out_of_memory: return "Could not allocate memory"; | |
7046 case status_internal_error: return "Internal error occurred"; | |
7047 | |
7048 case status_unrecognized_tag: return "Could not determine tag type"; | |
7049 | |
7050 case status_bad_pi: return "Error parsing document declaration/processing instruction"; | |
7051 case status_bad_comment: return "Error parsing comment"; | |
7052 case status_bad_cdata: return "Error parsing CDATA section"; | |
7053 case status_bad_doctype: return "Error parsing document type declaration"; | |
7054 case status_bad_pcdata: return "Error parsing PCDATA section"; | |
7055 case status_bad_start_element: return "Error parsing start element tag"; | |
7056 case status_bad_attribute: return "Error parsing element attribute"; | |
7057 case status_bad_end_element: return "Error parsing end element tag"; | |
7058 case status_end_element_mismatch: return "Start-end tags mismatch"; | |
7059 | |
7060 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; | |
7061 | |
7062 case status_no_document_element: return "No document element found"; | |
7063 | |
7064 default: return "Unknown error"; | |
7065 } | |
7066 } | |
7067 | |
7068 PUGI__FN xml_document::xml_document(): _buffer(0) | |
7069 { | |
7070 _create(); | |
7071 } | |
7072 | |
7073 PUGI__FN xml_document::~xml_document() | |
7074 { | |
7075 _destroy(); | |
7076 } | |
7077 | |
7078 #ifdef PUGIXML_HAS_MOVE | |
7079 PUGI__FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) | |
7080 { | |
7081 _create(); | |
7082 _move(rhs); | |
7083 } | |
7084 | |
7085 PUGI__FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
7086 { | |
7087 if (this == &rhs) return *this; | |
7088 | |
7089 _destroy(); | |
7090 _create(); | |
7091 _move(rhs); | |
7092 | |
7093 return *this; | |
7094 } | |
7095 #endif | |
7096 | |
7097 PUGI__FN void xml_document::reset() | |
7098 { | |
7099 _destroy(); | |
7100 _create(); | |
7101 } | |
7102 | |
7103 PUGI__FN void xml_document::reset(const xml_document& proto) | |
7104 { | |
7105 reset(); | |
7106 | |
7107 impl::node_copy_tree(_root, proto._root); | |
7108 } | |
7109 | |
7110 PUGI__FN void xml_document::_create() | |
7111 { | |
7112 assert(!_root); | |
7113 | |
7114 #ifdef PUGIXML_COMPACT | |
7115 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit | |
7116 const size_t page_offset = sizeof(void*); | |
7117 #else | |
7118 const size_t page_offset = 0; | |
7119 #endif | |
7120 | |
7121 // initialize sentinel page | |
7122 PUGI__STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); | |
7123 | |
7124 // prepare page structure | |
7125 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); | |
7126 assert(page); | |
7127 | |
7128 page->busy_size = impl::xml_memory_page_size; | |
7129 | |
7130 // setup first page marker | |
7131 #ifdef PUGIXML_COMPACT | |
7132 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
7133 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); | |
7134 *page->compact_page_marker = sizeof(impl::xml_memory_page); | |
7135 #endif | |
7136 | |
7137 // allocate new root | |
7138 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); | |
7139 _root->prev_sibling_c = _root; | |
7140 | |
7141 // setup sentinel page | |
7142 page->allocator = static_cast<impl::xml_document_struct*>(_root); | |
7143 | |
7144 // setup hash table pointer in allocator | |
7145 #ifdef PUGIXML_COMPACT | |
7146 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; | |
7147 #endif | |
7148 | |
7149 // verify the document allocation | |
7150 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); | |
7151 } | |
7152 | |
7153 PUGI__FN void xml_document::_destroy() | |
7154 { | |
7155 assert(_root); | |
7156 | |
7157 // destroy static storage | |
7158 if (_buffer) | |
7159 { | |
7160 impl::xml_memory::deallocate(_buffer); | |
7161 _buffer = 0; | |
7162 } | |
7163 | |
7164 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) | |
7165 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) | |
7166 { | |
7167 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); | |
7168 } | |
7169 | |
7170 // destroy dynamic storage, leave sentinel page (it's in static memory) | |
7171 impl::xml_memory_page* root_page = PUGI__GETPAGE(_root); | |
7172 assert(root_page && !root_page->prev); | |
7173 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); | |
7174 | |
7175 for (impl::xml_memory_page* page = root_page->next; page; ) | |
7176 { | |
7177 impl::xml_memory_page* next = page->next; | |
7178 | |
7179 impl::xml_allocator::deallocate_page(page); | |
7180 | |
7181 page = next; | |
7182 } | |
7183 | |
7184 #ifdef PUGIXML_COMPACT | |
7185 // destroy hash table | |
7186 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); | |
7187 #endif | |
7188 | |
7189 _root = 0; | |
7190 } | |
7191 | |
7192 #ifdef PUGIXML_HAS_MOVE | |
7193 PUGI__FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
7194 { | |
7195 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); | |
7196 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); | |
7197 | |
7198 // save first child pointer for later; this needs hash access | |
7199 xml_node_struct* other_first_child = other->first_child; | |
7200 | |
7201 #ifdef PUGIXML_COMPACT | |
7202 // reserve space for the hash table up front; this is the only operation that can fail | |
7203 // if it does, we have no choice but to throw (if we have exceptions) | |
7204 if (other_first_child) | |
7205 { | |
7206 size_t other_children = 0; | |
7207 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
7208 other_children++; | |
7209 | |
7210 // in compact mode, each pointer assignment could result in a hash table request | |
7211 // during move, we have to relocate document first_child and parents of all children | |
7212 // normally there's just one child and its parent has a pointerless encoding but | |
7213 // we assume the worst here | |
7214 if (!other->_hash->reserve(other_children + 1)) | |
7215 { | |
7216 #ifdef PUGIXML_NO_EXCEPTIONS | |
7217 return; | |
7218 #else | |
7219 throw std::bad_alloc(); | |
7220 #endif | |
7221 } | |
7222 } | |
7223 #endif | |
7224 | |
7225 // move allocation state | |
7226 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state | |
7227 if (other->_root != PUGI__GETPAGE(other)) | |
7228 { | |
7229 doc->_root = other->_root; | |
7230 doc->_busy_size = other->_busy_size; | |
7231 } | |
7232 | |
7233 // move buffer state | |
7234 doc->buffer = other->buffer; | |
7235 doc->extra_buffers = other->extra_buffers; | |
7236 _buffer = rhs._buffer; | |
7237 | |
7238 #ifdef PUGIXML_COMPACT | |
7239 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child | |
7240 doc->hash = other->hash; | |
7241 doc->_hash = &doc->hash; | |
7242 | |
7243 // make sure we don't access other hash up until the end when we reinitialize other document | |
7244 other->_hash = 0; | |
7245 #endif | |
7246 | |
7247 // move page structure | |
7248 impl::xml_memory_page* doc_page = PUGI__GETPAGE(doc); | |
7249 assert(doc_page && !doc_page->prev && !doc_page->next); | |
7250 | |
7251 impl::xml_memory_page* other_page = PUGI__GETPAGE(other); | |
7252 assert(other_page && !other_page->prev); | |
7253 | |
7254 // relink pages since root page is embedded into xml_document | |
7255 if (impl::xml_memory_page* page = other_page->next) | |
7256 { | |
7257 assert(page->prev == other_page); | |
7258 | |
7259 page->prev = doc_page; | |
7260 | |
7261 doc_page->next = page; | |
7262 other_page->next = 0; | |
7263 } | |
7264 | |
7265 // make sure pages point to the correct document state | |
7266 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) | |
7267 { | |
7268 assert(page->allocator == other); | |
7269 | |
7270 page->allocator = doc; | |
7271 | |
7272 #ifdef PUGIXML_COMPACT | |
7273 // this automatically migrates most children between documents and prevents ->parent assignment from allocating | |
7274 if (page->compact_shared_parent == other) | |
7275 page->compact_shared_parent = doc; | |
7276 #endif | |
7277 } | |
7278 | |
7279 // move tree structure | |
7280 assert(!doc->first_child); | |
7281 | |
7282 doc->first_child = other_first_child; | |
7283 | |
7284 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
7285 { | |
7286 #ifdef PUGIXML_COMPACT | |
7287 // most children will have migrated when we reassigned compact_shared_parent | |
7288 assert(node->parent == other || node->parent == doc); | |
7289 | |
7290 node->parent = doc; | |
7291 #else | |
7292 assert(node->parent == other); | |
7293 node->parent = doc; | |
7294 #endif | |
7295 } | |
7296 | |
7297 // reset other document | |
7298 new (other) impl::xml_document_struct(PUGI__GETPAGE(other)); | |
7299 rhs._buffer = 0; | |
7300 } | |
7301 #endif | |
7302 | |
7303 #ifndef PUGIXML_NO_STL | |
7304 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) | |
7305 { | |
7306 reset(); | |
7307 | |
7308 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); | |
7309 } | |
7310 | |
7311 PUGI__FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) | |
7312 { | |
7313 reset(); | |
7314 | |
7315 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); | |
7316 } | |
7317 #endif | |
7318 | |
7319 PUGI__FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) | |
7320 { | |
7321 // Force native encoding (skip autodetection) | |
7322 #ifdef PUGIXML_WCHAR_MODE | |
7323 xml_encoding encoding = encoding_wchar; | |
7324 #else | |
7325 xml_encoding encoding = encoding_utf8; | |
7326 #endif | |
7327 | |
7328 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); | |
7329 } | |
7330 | |
7331 PUGI__FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) | |
7332 { | |
7333 return load_string(contents, options); | |
7334 } | |
7335 | |
7336 PUGI__FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) | |
7337 { | |
7338 reset(); | |
7339 | |
7340 using impl::auto_deleter; // MSVC7 workaround | |
7341 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file); | |
7342 | |
7343 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
7344 } | |
7345 | |
7346 PUGI__FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) | |
7347 { | |
7348 reset(); | |
7349 | |
7350 using impl::auto_deleter; // MSVC7 workaround | |
7351 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); | |
7352 | |
7353 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
7354 } | |
7355 | |
7356 PUGI__FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7357 { | |
7358 reset(); | |
7359 | |
7360 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); | |
7361 } | |
7362 | |
7363 PUGI__FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7364 { | |
7365 reset(); | |
7366 | |
7367 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); | |
7368 } | |
7369 | |
7370 PUGI__FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7371 { | |
7372 reset(); | |
7373 | |
7374 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); | |
7375 } | |
7376 | |
7377 PUGI__FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7378 { | |
7379 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
7380 | |
7381 if ((flags & format_write_bom) && encoding != encoding_latin1) | |
7382 { | |
7383 // BOM always represents the codepoint U+FEFF, so just write it in native encoding | |
7384 #ifdef PUGIXML_WCHAR_MODE | |
7385 unsigned int bom = 0xfeff; | |
7386 buffered_writer.write(static_cast<wchar_t>(bom)); | |
7387 #else | |
7388 buffered_writer.write('\xef', '\xbb', '\xbf'); | |
7389 #endif | |
7390 } | |
7391 | |
7392 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) | |
7393 { | |
7394 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); | |
7395 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); | |
7396 buffered_writer.write('?', '>'); | |
7397 if (!(flags & format_raw)) buffered_writer.write('\n'); | |
7398 } | |
7399 | |
7400 impl::node_output(buffered_writer, _root, indent, flags, 0); | |
7401 | |
7402 buffered_writer.flush(); | |
7403 } | |
7404 | |
7405 #ifndef PUGIXML_NO_STL | |
7406 PUGI__FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7407 { | |
7408 xml_writer_stream writer(stream); | |
7409 | |
7410 save(writer, indent, flags, encoding); | |
7411 } | |
7412 | |
7413 PUGI__FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const | |
7414 { | |
7415 xml_writer_stream writer(stream); | |
7416 | |
7417 save(writer, indent, flags, encoding_wchar); | |
7418 } | |
7419 #endif | |
7420 | |
7421 PUGI__FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7422 { | |
7423 using impl::auto_deleter; // MSVC7 workaround | |
7424 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); | |
7425 | |
7426 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
7427 } | |
7428 | |
7429 PUGI__FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7430 { | |
7431 using impl::auto_deleter; // MSVC7 workaround | |
7432 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); | |
7433 | |
7434 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
7435 } | |
7436 | |
7437 PUGI__FN xml_node xml_document::document_element() const | |
7438 { | |
7439 assert(_root); | |
7440 | |
7441 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
7442 if (PUGI__NODETYPE(i) == node_element) | |
7443 return xml_node(i); | |
7444 | |
7445 return xml_node(); | |
7446 } | |
7447 | |
7448 #ifndef PUGIXML_NO_STL | |
7449 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) | |
7450 { | |
7451 assert(str); | |
7452 | |
7453 return impl::as_utf8_impl(str, impl::strlength_wide(str)); | |
7454 } | |
7455 | |
7456 PUGI__FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) | |
7457 { | |
7458 return impl::as_utf8_impl(str.c_str(), str.size()); | |
7459 } | |
7460 | |
7461 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) | |
7462 { | |
7463 assert(str); | |
7464 | |
7465 return impl::as_wide_impl(str, strlen(str)); | |
7466 } | |
7467 | |
7468 PUGI__FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) | |
7469 { | |
7470 return impl::as_wide_impl(str.c_str(), str.size()); | |
7471 } | |
7472 #endif | |
7473 | |
7474 PUGI__FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) | |
7475 { | |
7476 impl::xml_memory::allocate = allocate; | |
7477 impl::xml_memory::deallocate = deallocate; | |
7478 } | |
7479 | |
7480 PUGI__FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() | |
7481 { | |
7482 return impl::xml_memory::allocate; | |
7483 } | |
7484 | |
7485 PUGI__FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() | |
7486 { | |
7487 return impl::xml_memory::deallocate; | |
7488 } | |
7489 } | |
7490 | |
7491 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) | |
7492 namespace std | |
7493 { | |
7494 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) | |
7495 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) | |
7496 { | |
7497 return std::bidirectional_iterator_tag(); | |
7498 } | |
7499 | |
7500 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) | |
7501 { | |
7502 return std::bidirectional_iterator_tag(); | |
7503 } | |
7504 | |
7505 PUGI__FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) | |
7506 { | |
7507 return std::bidirectional_iterator_tag(); | |
7508 } | |
7509 } | |
7510 #endif | |
7511 | |
7512 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) | |
7513 namespace std | |
7514 { | |
7515 // Workarounds for (non-standard) iterator category detection | |
7516 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) | |
7517 { | |
7518 return std::bidirectional_iterator_tag(); | |
7519 } | |
7520 | |
7521 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) | |
7522 { | |
7523 return std::bidirectional_iterator_tag(); | |
7524 } | |
7525 | |
7526 PUGI__FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) | |
7527 { | |
7528 return std::bidirectional_iterator_tag(); | |
7529 } | |
7530 } | |
7531 #endif | |
7532 | |
7533 #ifndef PUGIXML_NO_XPATH | |
7534 // STL replacements | |
7535 PUGI__NS_BEGIN | |
7536 struct equal_to | |
7537 { | |
7538 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7539 { | |
7540 return lhs == rhs; | |
7541 } | |
7542 }; | |
7543 | |
7544 struct not_equal_to | |
7545 { | |
7546 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7547 { | |
7548 return lhs != rhs; | |
7549 } | |
7550 }; | |
7551 | |
7552 struct less | |
7553 { | |
7554 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7555 { | |
7556 return lhs < rhs; | |
7557 } | |
7558 }; | |
7559 | |
7560 struct less_equal | |
7561 { | |
7562 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7563 { | |
7564 return lhs <= rhs; | |
7565 } | |
7566 }; | |
7567 | |
7568 template <typename T> inline void swap(T& lhs, T& rhs) | |
7569 { | |
7570 T temp = lhs; | |
7571 lhs = rhs; | |
7572 rhs = temp; | |
7573 } | |
7574 | |
7575 template <typename I, typename Pred> PUGI__FN I min_element(I begin, I end, const Pred& pred) | |
7576 { | |
7577 I result = begin; | |
7578 | |
7579 for (I it = begin + 1; it != end; ++it) | |
7580 if (pred(*it, *result)) | |
7581 result = it; | |
7582 | |
7583 return result; | |
7584 } | |
7585 | |
7586 template <typename I> PUGI__FN void reverse(I begin, I end) | |
7587 { | |
7588 while (end - begin > 1) | |
7589 swap(*begin++, *--end); | |
7590 } | |
7591 | |
7592 template <typename I> PUGI__FN I unique(I begin, I end) | |
7593 { | |
7594 // fast skip head | |
7595 while (end - begin > 1 && *begin != *(begin + 1)) | |
7596 begin++; | |
7597 | |
7598 if (begin == end) | |
7599 return begin; | |
7600 | |
7601 // last written element | |
7602 I write = begin++; | |
7603 | |
7604 // merge unique elements | |
7605 while (begin != end) | |
7606 { | |
7607 if (*begin != *write) | |
7608 *++write = *begin++; | |
7609 else | |
7610 begin++; | |
7611 } | |
7612 | |
7613 // past-the-end (write points to live element) | |
7614 return write + 1; | |
7615 } | |
7616 | |
7617 template <typename T, typename Pred> PUGI__FN void insertion_sort(T* begin, T* end, const Pred& pred) | |
7618 { | |
7619 if (begin == end) | |
7620 return; | |
7621 | |
7622 for (T* it = begin + 1; it != end; ++it) | |
7623 { | |
7624 T val = *it; | |
7625 T* hole = it; | |
7626 | |
7627 // move hole backwards | |
7628 while (hole > begin && pred(val, *(hole - 1))) | |
7629 { | |
7630 *hole = *(hole - 1); | |
7631 hole--; | |
7632 } | |
7633 | |
7634 // fill hole with element | |
7635 *hole = val; | |
7636 } | |
7637 } | |
7638 | |
7639 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred) | |
7640 { | |
7641 if (pred(*middle, *first)) | |
7642 swap(middle, first); | |
7643 if (pred(*last, *middle)) | |
7644 swap(last, middle); | |
7645 if (pred(*middle, *first)) | |
7646 swap(middle, first); | |
7647 | |
7648 return middle; | |
7649 } | |
7650 | |
7651 template <typename T, typename Pred> PUGI__FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) | |
7652 { | |
7653 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) | |
7654 T* eq = begin; | |
7655 T* lt = begin; | |
7656 T* gt = end; | |
7657 | |
7658 while (lt < gt) | |
7659 { | |
7660 if (pred(*lt, pivot)) | |
7661 lt++; | |
7662 else if (*lt == pivot) | |
7663 swap(*eq++, *lt++); | |
7664 else | |
7665 swap(*lt, *--gt); | |
7666 } | |
7667 | |
7668 // we now have just 4 groups: = < >; move equal elements to the middle | |
7669 T* eqbeg = gt; | |
7670 | |
7671 for (T* it = begin; it != eq; ++it) | |
7672 swap(*it, *--eqbeg); | |
7673 | |
7674 *out_eqbeg = eqbeg; | |
7675 *out_eqend = gt; | |
7676 } | |
7677 | |
7678 template <typename I, typename Pred> PUGI__FN void sort(I begin, I end, const Pred& pred) | |
7679 { | |
7680 // sort large chunks | |
7681 while (end - begin > 16) | |
7682 { | |
7683 // find median element | |
7684 I middle = begin + (end - begin) / 2; | |
7685 I median = median3(begin, middle, end - 1, pred); | |
7686 | |
7687 // partition in three chunks (< = >) | |
7688 I eqbeg, eqend; | |
7689 partition3(begin, end, *median, pred, &eqbeg, &eqend); | |
7690 | |
7691 // loop on larger half | |
7692 if (eqbeg - begin > end - eqend) | |
7693 { | |
7694 sort(eqend, end, pred); | |
7695 end = eqbeg; | |
7696 } | |
7697 else | |
7698 { | |
7699 sort(begin, eqbeg, pred); | |
7700 begin = eqend; | |
7701 } | |
7702 } | |
7703 | |
7704 // insertion sort small chunk | |
7705 insertion_sort(begin, end, pred); | |
7706 } | |
7707 | |
7708 PUGI__FN bool hash_insert(const void** table, size_t size, const void* key) | |
7709 { | |
7710 assert(key); | |
7711 | |
7712 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); | |
7713 | |
7714 // MurmurHash3 32-bit finalizer | |
7715 h ^= h >> 16; | |
7716 h *= 0x85ebca6bu; | |
7717 h ^= h >> 13; | |
7718 h *= 0xc2b2ae35u; | |
7719 h ^= h >> 16; | |
7720 | |
7721 size_t hashmod = size - 1; | |
7722 size_t bucket = h & hashmod; | |
7723 | |
7724 for (size_t probe = 0; probe <= hashmod; ++probe) | |
7725 { | |
7726 if (table[bucket] == 0) | |
7727 { | |
7728 table[bucket] = key; | |
7729 return true; | |
7730 } | |
7731 | |
7732 if (table[bucket] == key) | |
7733 return false; | |
7734 | |
7735 // hash collision, quadratic probing | |
7736 bucket = (bucket + probe + 1) & hashmod; | |
7737 } | |
7738 | |
7739 assert(false && "Hash table is full"); // unreachable | |
7740 return false; | |
7741 } | |
7742 PUGI__NS_END | |
7743 | |
7744 // Allocator used for AST and evaluation stacks | |
7745 PUGI__NS_BEGIN | |
7746 static const size_t xpath_memory_page_size = | |
7747 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
7748 PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
7749 #else | |
7750 4096 | |
7751 #endif | |
7752 ; | |
7753 | |
7754 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); | |
7755 | |
7756 struct xpath_memory_block | |
7757 { | |
7758 xpath_memory_block* next; | |
7759 size_t capacity; | |
7760 | |
7761 union | |
7762 { | |
7763 char data[xpath_memory_page_size]; | |
7764 double alignment; | |
7765 }; | |
7766 }; | |
7767 | |
7768 struct xpath_allocator | |
7769 { | |
7770 xpath_memory_block* _root; | |
7771 size_t _root_size; | |
7772 bool* _error; | |
7773 | |
7774 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) | |
7775 { | |
7776 } | |
7777 | |
7778 void* allocate(size_t size) | |
7779 { | |
7780 // round size up to block alignment boundary | |
7781 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7782 | |
7783 if (_root_size + size <= _root->capacity) | |
7784 { | |
7785 void* buf = &_root->data[0] + _root_size; | |
7786 _root_size += size; | |
7787 return buf; | |
7788 } | |
7789 else | |
7790 { | |
7791 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests | |
7792 size_t block_capacity_base = sizeof(_root->data); | |
7793 size_t block_capacity_req = size + block_capacity_base / 4; | |
7794 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; | |
7795 | |
7796 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); | |
7797 | |
7798 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); | |
7799 if (!block) | |
7800 { | |
7801 if (_error) *_error = true; | |
7802 return 0; | |
7803 } | |
7804 | |
7805 block->next = _root; | |
7806 block->capacity = block_capacity; | |
7807 | |
7808 _root = block; | |
7809 _root_size = size; | |
7810 | |
7811 return block->data; | |
7812 } | |
7813 } | |
7814 | |
7815 void* reallocate(void* ptr, size_t old_size, size_t new_size) | |
7816 { | |
7817 // round size up to block alignment boundary | |
7818 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7819 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7820 | |
7821 // we can only reallocate the last object | |
7822 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); | |
7823 | |
7824 // try to reallocate the object inplace | |
7825 if (ptr && _root_size - old_size + new_size <= _root->capacity) | |
7826 { | |
7827 _root_size = _root_size - old_size + new_size; | |
7828 return ptr; | |
7829 } | |
7830 | |
7831 // allocate a new block | |
7832 void* result = allocate(new_size); | |
7833 if (!result) return 0; | |
7834 | |
7835 // we have a new block | |
7836 if (ptr) | |
7837 { | |
7838 // copy old data (we only support growing) | |
7839 assert(new_size >= old_size); | |
7840 memcpy(result, ptr, old_size); | |
7841 | |
7842 // free the previous page if it had no other objects | |
7843 assert(_root->data == result); | |
7844 assert(_root->next); | |
7845 | |
7846 if (_root->next->data == ptr) | |
7847 { | |
7848 // deallocate the whole page, unless it was the first one | |
7849 xpath_memory_block* next = _root->next->next; | |
7850 | |
7851 if (next) | |
7852 { | |
7853 xml_memory::deallocate(_root->next); | |
7854 _root->next = next; | |
7855 } | |
7856 } | |
7857 } | |
7858 | |
7859 return result; | |
7860 } | |
7861 | |
7862 void revert(const xpath_allocator& state) | |
7863 { | |
7864 // free all new pages | |
7865 xpath_memory_block* cur = _root; | |
7866 | |
7867 while (cur != state._root) | |
7868 { | |
7869 xpath_memory_block* next = cur->next; | |
7870 | |
7871 xml_memory::deallocate(cur); | |
7872 | |
7873 cur = next; | |
7874 } | |
7875 | |
7876 // restore state | |
7877 _root = state._root; | |
7878 _root_size = state._root_size; | |
7879 } | |
7880 | |
7881 void release() | |
7882 { | |
7883 xpath_memory_block* cur = _root; | |
7884 assert(cur); | |
7885 | |
7886 while (cur->next) | |
7887 { | |
7888 xpath_memory_block* next = cur->next; | |
7889 | |
7890 xml_memory::deallocate(cur); | |
7891 | |
7892 cur = next; | |
7893 } | |
7894 } | |
7895 }; | |
7896 | |
7897 struct xpath_allocator_capture | |
7898 { | |
7899 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) | |
7900 { | |
7901 } | |
7902 | |
7903 ~xpath_allocator_capture() | |
7904 { | |
7905 _target->revert(_state); | |
7906 } | |
7907 | |
7908 xpath_allocator* _target; | |
7909 xpath_allocator _state; | |
7910 }; | |
7911 | |
7912 struct xpath_stack | |
7913 { | |
7914 xpath_allocator* result; | |
7915 xpath_allocator* temp; | |
7916 }; | |
7917 | |
7918 struct xpath_stack_data | |
7919 { | |
7920 xpath_memory_block blocks[2]; | |
7921 xpath_allocator result; | |
7922 xpath_allocator temp; | |
7923 xpath_stack stack; | |
7924 bool oom; | |
7925 | |
7926 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) | |
7927 { | |
7928 blocks[0].next = blocks[1].next = 0; | |
7929 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); | |
7930 | |
7931 stack.result = &result; | |
7932 stack.temp = &temp; | |
7933 } | |
7934 | |
7935 ~xpath_stack_data() | |
7936 { | |
7937 result.release(); | |
7938 temp.release(); | |
7939 } | |
7940 }; | |
7941 PUGI__NS_END | |
7942 | |
7943 // String class | |
7944 PUGI__NS_BEGIN | |
7945 class xpath_string | |
7946 { | |
7947 const char_t* _buffer; | |
7948 bool _uses_heap; | |
7949 size_t _length_heap; | |
7950 | |
7951 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) | |
7952 { | |
7953 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); | |
7954 if (!result) return 0; | |
7955 | |
7956 memcpy(result, string, length * sizeof(char_t)); | |
7957 result[length] = 0; | |
7958 | |
7959 return result; | |
7960 } | |
7961 | |
7962 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) | |
7963 { | |
7964 } | |
7965 | |
7966 public: | |
7967 static xpath_string from_const(const char_t* str) | |
7968 { | |
7969 return xpath_string(str, false, 0); | |
7970 } | |
7971 | |
7972 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) | |
7973 { | |
7974 assert(begin <= end && *end == 0); | |
7975 | |
7976 return xpath_string(begin, true, static_cast<size_t>(end - begin)); | |
7977 } | |
7978 | |
7979 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) | |
7980 { | |
7981 assert(begin <= end); | |
7982 | |
7983 if (begin == end) | |
7984 return xpath_string(); | |
7985 | |
7986 size_t length = static_cast<size_t>(end - begin); | |
7987 const char_t* data = duplicate_string(begin, length, alloc); | |
7988 | |
7989 return data ? xpath_string(data, true, length) : xpath_string(); | |
7990 } | |
7991 | |
7992 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) | |
7993 { | |
7994 } | |
7995 | |
7996 void append(const xpath_string& o, xpath_allocator* alloc) | |
7997 { | |
7998 // skip empty sources | |
7999 if (!*o._buffer) return; | |
8000 | |
8001 // fast append for constant empty target and constant source | |
8002 if (!*_buffer && !_uses_heap && !o._uses_heap) | |
8003 { | |
8004 _buffer = o._buffer; | |
8005 } | |
8006 else | |
8007 { | |
8008 // need to make heap copy | |
8009 size_t target_length = length(); | |
8010 size_t source_length = o.length(); | |
8011 size_t result_length = target_length + source_length; | |
8012 | |
8013 // allocate new buffer | |
8014 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); | |
8015 if (!result) return; | |
8016 | |
8017 // append first string to the new buffer in case there was no reallocation | |
8018 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); | |
8019 | |
8020 // append second string to the new buffer | |
8021 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); | |
8022 result[result_length] = 0; | |
8023 | |
8024 // finalize | |
8025 _buffer = result; | |
8026 _uses_heap = true; | |
8027 _length_heap = result_length; | |
8028 } | |
8029 } | |
8030 | |
8031 const char_t* c_str() const | |
8032 { | |
8033 return _buffer; | |
8034 } | |
8035 | |
8036 size_t length() const | |
8037 { | |
8038 return _uses_heap ? _length_heap : strlength(_buffer); | |
8039 } | |
8040 | |
8041 char_t* data(xpath_allocator* alloc) | |
8042 { | |
8043 // make private heap copy | |
8044 if (!_uses_heap) | |
8045 { | |
8046 size_t length_ = strlength(_buffer); | |
8047 const char_t* data_ = duplicate_string(_buffer, length_, alloc); | |
8048 | |
8049 if (!data_) return 0; | |
8050 | |
8051 _buffer = data_; | |
8052 _uses_heap = true; | |
8053 _length_heap = length_; | |
8054 } | |
8055 | |
8056 return const_cast<char_t*>(_buffer); | |
8057 } | |
8058 | |
8059 bool empty() const | |
8060 { | |
8061 return *_buffer == 0; | |
8062 } | |
8063 | |
8064 bool operator==(const xpath_string& o) const | |
8065 { | |
8066 return strequal(_buffer, o._buffer); | |
8067 } | |
8068 | |
8069 bool operator!=(const xpath_string& o) const | |
8070 { | |
8071 return !strequal(_buffer, o._buffer); | |
8072 } | |
8073 | |
8074 bool uses_heap() const | |
8075 { | |
8076 return _uses_heap; | |
8077 } | |
8078 }; | |
8079 PUGI__NS_END | |
8080 | |
8081 PUGI__NS_BEGIN | |
8082 PUGI__FN bool starts_with(const char_t* string, const char_t* pattern) | |
8083 { | |
8084 while (*pattern && *string == *pattern) | |
8085 { | |
8086 string++; | |
8087 pattern++; | |
8088 } | |
8089 | |
8090 return *pattern == 0; | |
8091 } | |
8092 | |
8093 PUGI__FN const char_t* find_char(const char_t* s, char_t c) | |
8094 { | |
8095 #ifdef PUGIXML_WCHAR_MODE | |
8096 return wcschr(s, c); | |
8097 #else | |
8098 return strchr(s, c); | |
8099 #endif | |
8100 } | |
8101 | |
8102 PUGI__FN const char_t* find_substring(const char_t* s, const char_t* p) | |
8103 { | |
8104 #ifdef PUGIXML_WCHAR_MODE | |
8105 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) | |
8106 return (*p == 0) ? s : wcsstr(s, p); | |
8107 #else | |
8108 return strstr(s, p); | |
8109 #endif | |
8110 } | |
8111 | |
8112 // Converts symbol to lower case, if it is an ASCII one | |
8113 PUGI__FN char_t tolower_ascii(char_t ch) | |
8114 { | |
8115 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; | |
8116 } | |
8117 | |
8118 PUGI__FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) | |
8119 { | |
8120 if (na.attribute()) | |
8121 return xpath_string::from_const(na.attribute().value()); | |
8122 else | |
8123 { | |
8124 xml_node n = na.node(); | |
8125 | |
8126 switch (n.type()) | |
8127 { | |
8128 case node_pcdata: | |
8129 case node_cdata: | |
8130 case node_comment: | |
8131 case node_pi: | |
8132 return xpath_string::from_const(n.value()); | |
8133 | |
8134 case node_document: | |
8135 case node_element: | |
8136 { | |
8137 xpath_string result; | |
8138 | |
8139 // element nodes can have value if parse_embed_pcdata was used | |
8140 if (n.value()[0]) | |
8141 result.append(xpath_string::from_const(n.value()), alloc); | |
8142 | |
8143 xml_node cur = n.first_child(); | |
8144 | |
8145 while (cur && cur != n) | |
8146 { | |
8147 if (cur.type() == node_pcdata || cur.type() == node_cdata) | |
8148 result.append(xpath_string::from_const(cur.value()), alloc); | |
8149 | |
8150 if (cur.first_child()) | |
8151 cur = cur.first_child(); | |
8152 else if (cur.next_sibling()) | |
8153 cur = cur.next_sibling(); | |
8154 else | |
8155 { | |
8156 while (!cur.next_sibling() && cur != n) | |
8157 cur = cur.parent(); | |
8158 | |
8159 if (cur != n) cur = cur.next_sibling(); | |
8160 } | |
8161 } | |
8162 | |
8163 return result; | |
8164 } | |
8165 | |
8166 default: | |
8167 return xpath_string(); | |
8168 } | |
8169 } | |
8170 } | |
8171 | |
8172 PUGI__FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) | |
8173 { | |
8174 assert(ln->parent == rn->parent); | |
8175 | |
8176 // there is no common ancestor (the shared parent is null), nodes are from different documents | |
8177 if (!ln->parent) return ln < rn; | |
8178 | |
8179 // determine sibling order | |
8180 xml_node_struct* ls = ln; | |
8181 xml_node_struct* rs = rn; | |
8182 | |
8183 while (ls && rs) | |
8184 { | |
8185 if (ls == rn) return true; | |
8186 if (rs == ln) return false; | |
8187 | |
8188 ls = ls->next_sibling; | |
8189 rs = rs->next_sibling; | |
8190 } | |
8191 | |
8192 // if rn sibling chain ended ln must be before rn | |
8193 return !rs; | |
8194 } | |
8195 | |
8196 PUGI__FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) | |
8197 { | |
8198 // find common ancestor at the same depth, if any | |
8199 xml_node_struct* lp = ln; | |
8200 xml_node_struct* rp = rn; | |
8201 | |
8202 while (lp && rp && lp->parent != rp->parent) | |
8203 { | |
8204 lp = lp->parent; | |
8205 rp = rp->parent; | |
8206 } | |
8207 | |
8208 // parents are the same! | |
8209 if (lp && rp) return node_is_before_sibling(lp, rp); | |
8210 | |
8211 // nodes are at different depths, need to normalize heights | |
8212 bool left_higher = !lp; | |
8213 | |
8214 while (lp) | |
8215 { | |
8216 lp = lp->parent; | |
8217 ln = ln->parent; | |
8218 } | |
8219 | |
8220 while (rp) | |
8221 { | |
8222 rp = rp->parent; | |
8223 rn = rn->parent; | |
8224 } | |
8225 | |
8226 // one node is the ancestor of the other | |
8227 if (ln == rn) return left_higher; | |
8228 | |
8229 // find common ancestor... again | |
8230 while (ln->parent != rn->parent) | |
8231 { | |
8232 ln = ln->parent; | |
8233 rn = rn->parent; | |
8234 } | |
8235 | |
8236 return node_is_before_sibling(ln, rn); | |
8237 } | |
8238 | |
8239 PUGI__FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) | |
8240 { | |
8241 while (node && node != parent) node = node->parent; | |
8242 | |
8243 return parent && node == parent; | |
8244 } | |
8245 | |
8246 PUGI__FN const void* document_buffer_order(const xpath_node& xnode) | |
8247 { | |
8248 xml_node_struct* node = xnode.node().internal_object(); | |
8249 | |
8250 if (node) | |
8251 { | |
8252 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) | |
8253 { | |
8254 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; | |
8255 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; | |
8256 } | |
8257 | |
8258 return 0; | |
8259 } | |
8260 | |
8261 xml_attribute_struct* attr = xnode.attribute().internal_object(); | |
8262 | |
8263 if (attr) | |
8264 { | |
8265 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) | |
8266 { | |
8267 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; | |
8268 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; | |
8269 } | |
8270 | |
8271 return 0; | |
8272 } | |
8273 | |
8274 return 0; | |
8275 } | |
8276 | |
8277 struct document_order_comparator | |
8278 { | |
8279 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const | |
8280 { | |
8281 // optimized document order based check | |
8282 const void* lo = document_buffer_order(lhs); | |
8283 const void* ro = document_buffer_order(rhs); | |
8284 | |
8285 if (lo && ro) return lo < ro; | |
8286 | |
8287 // slow comparison | |
8288 xml_node ln = lhs.node(), rn = rhs.node(); | |
8289 | |
8290 // compare attributes | |
8291 if (lhs.attribute() && rhs.attribute()) | |
8292 { | |
8293 // shared parent | |
8294 if (lhs.parent() == rhs.parent()) | |
8295 { | |
8296 // determine sibling order | |
8297 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) | |
8298 if (a == rhs.attribute()) | |
8299 return true; | |
8300 | |
8301 return false; | |
8302 } | |
8303 | |
8304 // compare attribute parents | |
8305 ln = lhs.parent(); | |
8306 rn = rhs.parent(); | |
8307 } | |
8308 else if (lhs.attribute()) | |
8309 { | |
8310 // attributes go after the parent element | |
8311 if (lhs.parent() == rhs.node()) return false; | |
8312 | |
8313 ln = lhs.parent(); | |
8314 } | |
8315 else if (rhs.attribute()) | |
8316 { | |
8317 // attributes go after the parent element | |
8318 if (rhs.parent() == lhs.node()) return true; | |
8319 | |
8320 rn = rhs.parent(); | |
8321 } | |
8322 | |
8323 if (ln == rn) return false; | |
8324 | |
8325 if (!ln || !rn) return ln < rn; | |
8326 | |
8327 return node_is_before(ln.internal_object(), rn.internal_object()); | |
8328 } | |
8329 }; | |
8330 | |
8331 PUGI__FN double gen_nan() | |
8332 { | |
8333 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) | |
8334 PUGI__STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); | |
8335 typedef uint32_t UI; // BCC5 workaround | |
8336 union { float f; UI i; } u; | |
8337 u.i = 0x7fc00000; | |
8338 return double(u.f); | |
8339 #else | |
8340 // fallback | |
8341 const volatile double zero = 0.0; | |
8342 return zero / zero; | |
8343 #endif | |
8344 } | |
8345 | |
8346 PUGI__FN bool is_nan(double value) | |
8347 { | |
8348 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
8349 return !!_isnan(value); | |
8350 #elif defined(fpclassify) && defined(FP_NAN) | |
8351 return fpclassify(value) == FP_NAN; | |
8352 #else | |
8353 // fallback | |
8354 const volatile double v = value; | |
8355 return v != v; | |
8356 #endif | |
8357 } | |
8358 | |
8359 PUGI__FN const char_t* convert_number_to_string_special(double value) | |
8360 { | |
8361 #if defined(PUGI__MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
8362 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; | |
8363 if (_isnan(value)) return PUGIXML_TEXT("NaN"); | |
8364 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8365 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) | |
8366 switch (fpclassify(value)) | |
8367 { | |
8368 case FP_NAN: | |
8369 return PUGIXML_TEXT("NaN"); | |
8370 | |
8371 case FP_INFINITE: | |
8372 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8373 | |
8374 case FP_ZERO: | |
8375 return PUGIXML_TEXT("0"); | |
8376 | |
8377 default: | |
8378 return 0; | |
8379 } | |
8380 #else | |
8381 // fallback | |
8382 const volatile double v = value; | |
8383 | |
8384 if (v == 0) return PUGIXML_TEXT("0"); | |
8385 if (v != v) return PUGIXML_TEXT("NaN"); | |
8386 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8387 return 0; | |
8388 #endif | |
8389 } | |
8390 | |
8391 PUGI__FN bool convert_number_to_boolean(double value) | |
8392 { | |
8393 return (value != 0 && !is_nan(value)); | |
8394 } | |
8395 | |
8396 PUGI__FN void truncate_zeros(char* begin, char* end) | |
8397 { | |
8398 while (begin != end && end[-1] == '0') end--; | |
8399 | |
8400 *end = 0; | |
8401 } | |
8402 | |
8403 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent | |
8404 #if defined(PUGI__MSVC_CRT_VERSION) && PUGI__MSVC_CRT_VERSION >= 1400 | |
8405 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
8406 { | |
8407 // get base values | |
8408 int sign, exponent; | |
8409 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); | |
8410 | |
8411 // truncate redundant zeros | |
8412 truncate_zeros(buffer, buffer + strlen(buffer)); | |
8413 | |
8414 // fill results | |
8415 *out_mantissa = buffer; | |
8416 *out_exponent = exponent; | |
8417 } | |
8418 #else | |
8419 PUGI__FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
8420 { | |
8421 // get a scientific notation value with IEEE DBL_DIG decimals | |
8422 PUGI__SNPRINTF(buffer, "%.*e", DBL_DIG, value); | |
8423 | |
8424 // get the exponent (possibly negative) | |
8425 char* exponent_string = strchr(buffer, 'e'); | |
8426 assert(exponent_string); | |
8427 | |
8428 int exponent = atoi(exponent_string + 1); | |
8429 | |
8430 // extract mantissa string: skip sign | |
8431 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; | |
8432 assert(mantissa[0] != '0' && mantissa[1] == '.'); | |
8433 | |
8434 // divide mantissa by 10 to eliminate integer part | |
8435 mantissa[1] = mantissa[0]; | |
8436 mantissa++; | |
8437 exponent++; | |
8438 | |
8439 // remove extra mantissa digits and zero-terminate mantissa | |
8440 truncate_zeros(mantissa, exponent_string); | |
8441 | |
8442 // fill results | |
8443 *out_mantissa = mantissa; | |
8444 *out_exponent = exponent; | |
8445 } | |
8446 #endif | |
8447 | |
8448 PUGI__FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) | |
8449 { | |
8450 // try special number conversion | |
8451 const char_t* special = convert_number_to_string_special(value); | |
8452 if (special) return xpath_string::from_const(special); | |
8453 | |
8454 // get mantissa + exponent form | |
8455 char mantissa_buffer[32]; | |
8456 | |
8457 char* mantissa; | |
8458 int exponent; | |
8459 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); | |
8460 | |
8461 // allocate a buffer of suitable length for the number | |
8462 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; | |
8463 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); | |
8464 if (!result) return xpath_string(); | |
8465 | |
8466 // make the number! | |
8467 char_t* s = result; | |
8468 | |
8469 // sign | |
8470 if (value < 0) *s++ = '-'; | |
8471 | |
8472 // integer part | |
8473 if (exponent <= 0) | |
8474 { | |
8475 *s++ = '0'; | |
8476 } | |
8477 else | |
8478 { | |
8479 while (exponent > 0) | |
8480 { | |
8481 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); | |
8482 *s++ = *mantissa ? *mantissa++ : '0'; | |
8483 exponent--; | |
8484 } | |
8485 } | |
8486 | |
8487 // fractional part | |
8488 if (*mantissa) | |
8489 { | |
8490 // decimal point | |
8491 *s++ = '.'; | |
8492 | |
8493 // extra zeroes from negative exponent | |
8494 while (exponent < 0) | |
8495 { | |
8496 *s++ = '0'; | |
8497 exponent++; | |
8498 } | |
8499 | |
8500 // extra mantissa digits | |
8501 while (*mantissa) | |
8502 { | |
8503 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); | |
8504 *s++ = *mantissa++; | |
8505 } | |
8506 } | |
8507 | |
8508 // zero-terminate | |
8509 assert(s < result + result_size); | |
8510 *s = 0; | |
8511 | |
8512 return xpath_string::from_heap_preallocated(result, s); | |
8513 } | |
8514 | |
8515 PUGI__FN bool check_string_to_number_format(const char_t* string) | |
8516 { | |
8517 // parse leading whitespace | |
8518 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; | |
8519 | |
8520 // parse sign | |
8521 if (*string == '-') ++string; | |
8522 | |
8523 if (!*string) return false; | |
8524 | |
8525 // if there is no integer part, there should be a decimal part with at least one digit | |
8526 if (!PUGI__IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI__IS_CHARTYPEX(string[1], ctx_digit))) return false; | |
8527 | |
8528 // parse integer part | |
8529 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
8530 | |
8531 // parse decimal part | |
8532 if (*string == '.') | |
8533 { | |
8534 ++string; | |
8535 | |
8536 while (PUGI__IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
8537 } | |
8538 | |
8539 // parse trailing whitespace | |
8540 while (PUGI__IS_CHARTYPE(*string, ct_space)) ++string; | |
8541 | |
8542 return *string == 0; | |
8543 } | |
8544 | |
8545 PUGI__FN double convert_string_to_number(const char_t* string) | |
8546 { | |
8547 // check string format | |
8548 if (!check_string_to_number_format(string)) return gen_nan(); | |
8549 | |
8550 // parse string | |
8551 #ifdef PUGIXML_WCHAR_MODE | |
8552 return wcstod(string, 0); | |
8553 #else | |
8554 return strtod(string, 0); | |
8555 #endif | |
8556 } | |
8557 | |
8558 PUGI__FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) | |
8559 { | |
8560 size_t length = static_cast<size_t>(end - begin); | |
8561 char_t* scratch = buffer; | |
8562 | |
8563 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
8564 { | |
8565 // need to make dummy on-heap copy | |
8566 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
8567 if (!scratch) return false; | |
8568 } | |
8569 | |
8570 // copy string to zero-terminated buffer and perform conversion | |
8571 memcpy(scratch, begin, length * sizeof(char_t)); | |
8572 scratch[length] = 0; | |
8573 | |
8574 *out_result = convert_string_to_number(scratch); | |
8575 | |
8576 // free dummy buffer | |
8577 if (scratch != buffer) xml_memory::deallocate(scratch); | |
8578 | |
8579 return true; | |
8580 } | |
8581 | |
8582 PUGI__FN double round_nearest(double value) | |
8583 { | |
8584 return floor(value + 0.5); | |
8585 } | |
8586 | |
8587 PUGI__FN double round_nearest_nzero(double value) | |
8588 { | |
8589 // same as round_nearest, but returns -0 for [-0.5, -0] | |
8590 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) | |
8591 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); | |
8592 } | |
8593 | |
8594 PUGI__FN const char_t* qualified_name(const xpath_node& node) | |
8595 { | |
8596 return node.attribute() ? node.attribute().name() : node.node().name(); | |
8597 } | |
8598 | |
8599 PUGI__FN const char_t* local_name(const xpath_node& node) | |
8600 { | |
8601 const char_t* name = qualified_name(node); | |
8602 const char_t* p = find_char(name, ':'); | |
8603 | |
8604 return p ? p + 1 : name; | |
8605 } | |
8606 | |
8607 struct namespace_uri_predicate | |
8608 { | |
8609 const char_t* prefix; | |
8610 size_t prefix_length; | |
8611 | |
8612 namespace_uri_predicate(const char_t* name) | |
8613 { | |
8614 const char_t* pos = find_char(name, ':'); | |
8615 | |
8616 prefix = pos ? name : 0; | |
8617 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; | |
8618 } | |
8619 | |
8620 bool operator()(xml_attribute a) const | |
8621 { | |
8622 const char_t* name = a.name(); | |
8623 | |
8624 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; | |
8625 | |
8626 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; | |
8627 } | |
8628 }; | |
8629 | |
8630 PUGI__FN const char_t* namespace_uri(xml_node node) | |
8631 { | |
8632 namespace_uri_predicate pred = node.name(); | |
8633 | |
8634 xml_node p = node; | |
8635 | |
8636 while (p) | |
8637 { | |
8638 xml_attribute a = p.find_attribute(pred); | |
8639 | |
8640 if (a) return a.value(); | |
8641 | |
8642 p = p.parent(); | |
8643 } | |
8644 | |
8645 return PUGIXML_TEXT(""); | |
8646 } | |
8647 | |
8648 PUGI__FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) | |
8649 { | |
8650 namespace_uri_predicate pred = attr.name(); | |
8651 | |
8652 // Default namespace does not apply to attributes | |
8653 if (!pred.prefix) return PUGIXML_TEXT(""); | |
8654 | |
8655 xml_node p = parent; | |
8656 | |
8657 while (p) | |
8658 { | |
8659 xml_attribute a = p.find_attribute(pred); | |
8660 | |
8661 if (a) return a.value(); | |
8662 | |
8663 p = p.parent(); | |
8664 } | |
8665 | |
8666 return PUGIXML_TEXT(""); | |
8667 } | |
8668 | |
8669 PUGI__FN const char_t* namespace_uri(const xpath_node& node) | |
8670 { | |
8671 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); | |
8672 } | |
8673 | |
8674 PUGI__FN char_t* normalize_space(char_t* buffer) | |
8675 { | |
8676 char_t* write = buffer; | |
8677 | |
8678 for (char_t* it = buffer; *it; ) | |
8679 { | |
8680 char_t ch = *it++; | |
8681 | |
8682 if (PUGI__IS_CHARTYPE(ch, ct_space)) | |
8683 { | |
8684 // replace whitespace sequence with single space | |
8685 while (PUGI__IS_CHARTYPE(*it, ct_space)) it++; | |
8686 | |
8687 // avoid leading spaces | |
8688 if (write != buffer) *write++ = ' '; | |
8689 } | |
8690 else *write++ = ch; | |
8691 } | |
8692 | |
8693 // remove trailing space | |
8694 if (write != buffer && PUGI__IS_CHARTYPE(write[-1], ct_space)) write--; | |
8695 | |
8696 // zero-terminate | |
8697 *write = 0; | |
8698 | |
8699 return write; | |
8700 } | |
8701 | |
8702 PUGI__FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) | |
8703 { | |
8704 char_t* write = buffer; | |
8705 | |
8706 while (*buffer) | |
8707 { | |
8708 PUGI__DMC_VOLATILE char_t ch = *buffer++; | |
8709 | |
8710 const char_t* pos = find_char(from, ch); | |
8711 | |
8712 if (!pos) | |
8713 *write++ = ch; // do not process | |
8714 else if (static_cast<size_t>(pos - from) < to_length) | |
8715 *write++ = to[pos - from]; // replace | |
8716 } | |
8717 | |
8718 // zero-terminate | |
8719 *write = 0; | |
8720 | |
8721 return write; | |
8722 } | |
8723 | |
8724 PUGI__FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) | |
8725 { | |
8726 unsigned char table[128] = {0}; | |
8727 | |
8728 while (*from) | |
8729 { | |
8730 unsigned int fc = static_cast<unsigned int>(*from); | |
8731 unsigned int tc = static_cast<unsigned int>(*to); | |
8732 | |
8733 if (fc >= 128 || tc >= 128) | |
8734 return 0; | |
8735 | |
8736 // code=128 means "skip character" | |
8737 if (!table[fc]) | |
8738 table[fc] = static_cast<unsigned char>(tc ? tc : 128); | |
8739 | |
8740 from++; | |
8741 if (tc) to++; | |
8742 } | |
8743 | |
8744 for (int i = 0; i < 128; ++i) | |
8745 if (!table[i]) | |
8746 table[i] = static_cast<unsigned char>(i); | |
8747 | |
8748 void* result = alloc->allocate(sizeof(table)); | |
8749 if (!result) return 0; | |
8750 | |
8751 memcpy(result, table, sizeof(table)); | |
8752 | |
8753 return static_cast<unsigned char*>(result); | |
8754 } | |
8755 | |
8756 PUGI__FN char_t* translate_table(char_t* buffer, const unsigned char* table) | |
8757 { | |
8758 char_t* write = buffer; | |
8759 | |
8760 while (*buffer) | |
8761 { | |
8762 char_t ch = *buffer++; | |
8763 unsigned int index = static_cast<unsigned int>(ch); | |
8764 | |
8765 if (index < 128) | |
8766 { | |
8767 unsigned char code = table[index]; | |
8768 | |
8769 // code=128 means "skip character" (table size is 128 so 128 can be a special value) | |
8770 // this code skips these characters without extra branches | |
8771 *write = static_cast<char_t>(code); | |
8772 write += 1 - (code >> 7); | |
8773 } | |
8774 else | |
8775 { | |
8776 *write++ = ch; | |
8777 } | |
8778 } | |
8779 | |
8780 // zero-terminate | |
8781 *write = 0; | |
8782 | |
8783 return write; | |
8784 } | |
8785 | |
8786 inline bool is_xpath_attribute(const char_t* name) | |
8787 { | |
8788 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); | |
8789 } | |
8790 | |
8791 struct xpath_variable_boolean: xpath_variable | |
8792 { | |
8793 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) | |
8794 { | |
8795 } | |
8796 | |
8797 bool value; | |
8798 char_t name[1]; | |
8799 }; | |
8800 | |
8801 struct xpath_variable_number: xpath_variable | |
8802 { | |
8803 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) | |
8804 { | |
8805 } | |
8806 | |
8807 double value; | |
8808 char_t name[1]; | |
8809 }; | |
8810 | |
8811 struct xpath_variable_string: xpath_variable | |
8812 { | |
8813 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) | |
8814 { | |
8815 } | |
8816 | |
8817 ~xpath_variable_string() | |
8818 { | |
8819 if (value) xml_memory::deallocate(value); | |
8820 } | |
8821 | |
8822 char_t* value; | |
8823 char_t name[1]; | |
8824 }; | |
8825 | |
8826 struct xpath_variable_node_set: xpath_variable | |
8827 { | |
8828 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) | |
8829 { | |
8830 } | |
8831 | |
8832 xpath_node_set value; | |
8833 char_t name[1]; | |
8834 }; | |
8835 | |
8836 static const xpath_node_set dummy_node_set; | |
8837 | |
8838 PUGI__FN PUGI__UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) | |
8839 { | |
8840 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) | |
8841 unsigned int result = 0; | |
8842 | |
8843 while (*str) | |
8844 { | |
8845 result += static_cast<unsigned int>(*str++); | |
8846 result += result << 10; | |
8847 result ^= result >> 6; | |
8848 } | |
8849 | |
8850 result += result << 3; | |
8851 result ^= result >> 11; | |
8852 result += result << 15; | |
8853 | |
8854 return result; | |
8855 } | |
8856 | |
8857 template <typename T> PUGI__FN T* new_xpath_variable(const char_t* name) | |
8858 { | |
8859 size_t length = strlength(name); | |
8860 if (length == 0) return 0; // empty variable names are invalid | |
8861 | |
8862 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters | |
8863 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); | |
8864 if (!memory) return 0; | |
8865 | |
8866 T* result = new (memory) T(); | |
8867 | |
8868 memcpy(result->name, name, (length + 1) * sizeof(char_t)); | |
8869 | |
8870 return result; | |
8871 } | |
8872 | |
8873 PUGI__FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) | |
8874 { | |
8875 switch (type) | |
8876 { | |
8877 case xpath_type_node_set: | |
8878 return new_xpath_variable<xpath_variable_node_set>(name); | |
8879 | |
8880 case xpath_type_number: | |
8881 return new_xpath_variable<xpath_variable_number>(name); | |
8882 | |
8883 case xpath_type_string: | |
8884 return new_xpath_variable<xpath_variable_string>(name); | |
8885 | |
8886 case xpath_type_boolean: | |
8887 return new_xpath_variable<xpath_variable_boolean>(name); | |
8888 | |
8889 default: | |
8890 return 0; | |
8891 } | |
8892 } | |
8893 | |
8894 template <typename T> PUGI__FN void delete_xpath_variable(T* var) | |
8895 { | |
8896 var->~T(); | |
8897 xml_memory::deallocate(var); | |
8898 } | |
8899 | |
8900 PUGI__FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) | |
8901 { | |
8902 switch (type) | |
8903 { | |
8904 case xpath_type_node_set: | |
8905 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); | |
8906 break; | |
8907 | |
8908 case xpath_type_number: | |
8909 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); | |
8910 break; | |
8911 | |
8912 case xpath_type_string: | |
8913 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); | |
8914 break; | |
8915 | |
8916 case xpath_type_boolean: | |
8917 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); | |
8918 break; | |
8919 | |
8920 default: | |
8921 assert(false && "Invalid variable type"); // unreachable | |
8922 } | |
8923 } | |
8924 | |
8925 PUGI__FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) | |
8926 { | |
8927 switch (rhs->type()) | |
8928 { | |
8929 case xpath_type_node_set: | |
8930 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); | |
8931 | |
8932 case xpath_type_number: | |
8933 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); | |
8934 | |
8935 case xpath_type_string: | |
8936 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); | |
8937 | |
8938 case xpath_type_boolean: | |
8939 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); | |
8940 | |
8941 default: | |
8942 assert(false && "Invalid variable type"); // unreachable | |
8943 return false; | |
8944 } | |
8945 } | |
8946 | |
8947 PUGI__FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) | |
8948 { | |
8949 size_t length = static_cast<size_t>(end - begin); | |
8950 char_t* scratch = buffer; | |
8951 | |
8952 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
8953 { | |
8954 // need to make dummy on-heap copy | |
8955 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
8956 if (!scratch) return false; | |
8957 } | |
8958 | |
8959 // copy string to zero-terminated buffer and perform lookup | |
8960 memcpy(scratch, begin, length * sizeof(char_t)); | |
8961 scratch[length] = 0; | |
8962 | |
8963 *out_result = set->get(scratch); | |
8964 | |
8965 // free dummy buffer | |
8966 if (scratch != buffer) xml_memory::deallocate(scratch); | |
8967 | |
8968 return true; | |
8969 } | |
8970 PUGI__NS_END | |
8971 | |
8972 // Internal node set class | |
8973 PUGI__NS_BEGIN | |
8974 PUGI__FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) | |
8975 { | |
8976 if (end - begin < 2) | |
8977 return xpath_node_set::type_sorted; | |
8978 | |
8979 document_order_comparator cmp; | |
8980 | |
8981 bool first = cmp(begin[0], begin[1]); | |
8982 | |
8983 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) | |
8984 if (cmp(it[0], it[1]) != first) | |
8985 return xpath_node_set::type_unsorted; | |
8986 | |
8987 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; | |
8988 } | |
8989 | |
8990 PUGI__FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) | |
8991 { | |
8992 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
8993 | |
8994 if (type == xpath_node_set::type_unsorted) | |
8995 { | |
8996 xpath_node_set::type_t sorted = xpath_get_order(begin, end); | |
8997 | |
8998 if (sorted == xpath_node_set::type_unsorted) | |
8999 { | |
9000 sort(begin, end, document_order_comparator()); | |
9001 | |
9002 type = xpath_node_set::type_sorted; | |
9003 } | |
9004 else | |
9005 type = sorted; | |
9006 } | |
9007 | |
9008 if (type != order) reverse(begin, end); | |
9009 | |
9010 return order; | |
9011 } | |
9012 | |
9013 PUGI__FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) | |
9014 { | |
9015 if (begin == end) return xpath_node(); | |
9016 | |
9017 switch (type) | |
9018 { | |
9019 case xpath_node_set::type_sorted: | |
9020 return *begin; | |
9021 | |
9022 case xpath_node_set::type_sorted_reverse: | |
9023 return *(end - 1); | |
9024 | |
9025 case xpath_node_set::type_unsorted: | |
9026 return *min_element(begin, end, document_order_comparator()); | |
9027 | |
9028 default: | |
9029 assert(false && "Invalid node set type"); // unreachable | |
9030 return xpath_node(); | |
9031 } | |
9032 } | |
9033 | |
9034 class xpath_node_set_raw | |
9035 { | |
9036 xpath_node_set::type_t _type; | |
9037 | |
9038 xpath_node* _begin; | |
9039 xpath_node* _end; | |
9040 xpath_node* _eos; | |
9041 | |
9042 public: | |
9043 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) | |
9044 { | |
9045 } | |
9046 | |
9047 xpath_node* begin() const | |
9048 { | |
9049 return _begin; | |
9050 } | |
9051 | |
9052 xpath_node* end() const | |
9053 { | |
9054 return _end; | |
9055 } | |
9056 | |
9057 bool empty() const | |
9058 { | |
9059 return _begin == _end; | |
9060 } | |
9061 | |
9062 size_t size() const | |
9063 { | |
9064 return static_cast<size_t>(_end - _begin); | |
9065 } | |
9066 | |
9067 xpath_node first() const | |
9068 { | |
9069 return xpath_first(_begin, _end, _type); | |
9070 } | |
9071 | |
9072 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); | |
9073 | |
9074 void push_back(const xpath_node& node, xpath_allocator* alloc) | |
9075 { | |
9076 if (_end != _eos) | |
9077 *_end++ = node; | |
9078 else | |
9079 push_back_grow(node, alloc); | |
9080 } | |
9081 | |
9082 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) | |
9083 { | |
9084 if (begin_ == end_) return; | |
9085 | |
9086 size_t size_ = static_cast<size_t>(_end - _begin); | |
9087 size_t capacity = static_cast<size_t>(_eos - _begin); | |
9088 size_t count = static_cast<size_t>(end_ - begin_); | |
9089 | |
9090 if (size_ + count > capacity) | |
9091 { | |
9092 // reallocate the old array or allocate a new one | |
9093 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); | |
9094 if (!data) return; | |
9095 | |
9096 // finalize | |
9097 _begin = data; | |
9098 _end = data + size_; | |
9099 _eos = data + size_ + count; | |
9100 } | |
9101 | |
9102 memcpy(_end, begin_, count * sizeof(xpath_node)); | |
9103 _end += count; | |
9104 } | |
9105 | |
9106 void sort_do() | |
9107 { | |
9108 _type = xpath_sort(_begin, _end, _type, false); | |
9109 } | |
9110 | |
9111 void truncate(xpath_node* pos) | |
9112 { | |
9113 assert(_begin <= pos && pos <= _end); | |
9114 | |
9115 _end = pos; | |
9116 } | |
9117 | |
9118 void remove_duplicates(xpath_allocator* alloc) | |
9119 { | |
9120 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2) | |
9121 { | |
9122 xpath_allocator_capture cr(alloc); | |
9123 | |
9124 size_t size_ = static_cast<size_t>(_end - _begin); | |
9125 | |
9126 size_t hash_size = 1; | |
9127 while (hash_size < size_ + size_ / 2) hash_size *= 2; | |
9128 | |
9129 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**))); | |
9130 if (!hash_data) return; | |
9131 | |
9132 memset(hash_data, 0, hash_size * sizeof(const void**)); | |
9133 | |
9134 xpath_node* write = _begin; | |
9135 | |
9136 for (xpath_node* it = _begin; it != _end; ++it) | |
9137 { | |
9138 const void* attr = it->attribute().internal_object(); | |
9139 const void* node = it->node().internal_object(); | |
9140 const void* key = attr ? attr : node; | |
9141 | |
9142 if (key && hash_insert(hash_data, hash_size, key)) | |
9143 { | |
9144 *write++ = *it; | |
9145 } | |
9146 } | |
9147 | |
9148 _end = write; | |
9149 } | |
9150 else | |
9151 { | |
9152 _end = unique(_begin, _end); | |
9153 } | |
9154 } | |
9155 | |
9156 xpath_node_set::type_t type() const | |
9157 { | |
9158 return _type; | |
9159 } | |
9160 | |
9161 void set_type(xpath_node_set::type_t value) | |
9162 { | |
9163 _type = value; | |
9164 } | |
9165 }; | |
9166 | |
9167 PUGI__FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) | |
9168 { | |
9169 size_t capacity = static_cast<size_t>(_eos - _begin); | |
9170 | |
9171 // get new capacity (1.5x rule) | |
9172 size_t new_capacity = capacity + capacity / 2 + 1; | |
9173 | |
9174 // reallocate the old array or allocate a new one | |
9175 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); | |
9176 if (!data) return; | |
9177 | |
9178 // finalize | |
9179 _begin = data; | |
9180 _end = data + capacity; | |
9181 _eos = data + new_capacity; | |
9182 | |
9183 // push | |
9184 *_end++ = node; | |
9185 } | |
9186 PUGI__NS_END | |
9187 | |
9188 PUGI__NS_BEGIN | |
9189 struct xpath_context | |
9190 { | |
9191 xpath_node n; | |
9192 size_t position, size; | |
9193 | |
9194 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) | |
9195 { | |
9196 } | |
9197 }; | |
9198 | |
9199 enum lexeme_t | |
9200 { | |
9201 lex_none = 0, | |
9202 lex_equal, | |
9203 lex_not_equal, | |
9204 lex_less, | |
9205 lex_greater, | |
9206 lex_less_or_equal, | |
9207 lex_greater_or_equal, | |
9208 lex_plus, | |
9209 lex_minus, | |
9210 lex_multiply, | |
9211 lex_union, | |
9212 lex_var_ref, | |
9213 lex_open_brace, | |
9214 lex_close_brace, | |
9215 lex_quoted_string, | |
9216 lex_number, | |
9217 lex_slash, | |
9218 lex_double_slash, | |
9219 lex_open_square_brace, | |
9220 lex_close_square_brace, | |
9221 lex_string, | |
9222 lex_comma, | |
9223 lex_axis_attribute, | |
9224 lex_dot, | |
9225 lex_double_dot, | |
9226 lex_double_colon, | |
9227 lex_eof | |
9228 }; | |
9229 | |
9230 struct xpath_lexer_string | |
9231 { | |
9232 const char_t* begin; | |
9233 const char_t* end; | |
9234 | |
9235 xpath_lexer_string(): begin(0), end(0) | |
9236 { | |
9237 } | |
9238 | |
9239 bool operator==(const char_t* other) const | |
9240 { | |
9241 size_t length = static_cast<size_t>(end - begin); | |
9242 | |
9243 return strequalrange(other, begin, length); | |
9244 } | |
9245 }; | |
9246 | |
9247 class xpath_lexer | |
9248 { | |
9249 const char_t* _cur; | |
9250 const char_t* _cur_lexeme_pos; | |
9251 xpath_lexer_string _cur_lexeme_contents; | |
9252 | |
9253 lexeme_t _cur_lexeme; | |
9254 | |
9255 public: | |
9256 explicit xpath_lexer(const char_t* query): _cur(query) | |
9257 { | |
9258 next(); | |
9259 } | |
9260 | |
9261 const char_t* state() const | |
9262 { | |
9263 return _cur; | |
9264 } | |
9265 | |
9266 void next() | |
9267 { | |
9268 const char_t* cur = _cur; | |
9269 | |
9270 while (PUGI__IS_CHARTYPE(*cur, ct_space)) ++cur; | |
9271 | |
9272 // save lexeme position for error reporting | |
9273 _cur_lexeme_pos = cur; | |
9274 | |
9275 switch (*cur) | |
9276 { | |
9277 case 0: | |
9278 _cur_lexeme = lex_eof; | |
9279 break; | |
9280 | |
9281 case '>': | |
9282 if (*(cur+1) == '=') | |
9283 { | |
9284 cur += 2; | |
9285 _cur_lexeme = lex_greater_or_equal; | |
9286 } | |
9287 else | |
9288 { | |
9289 cur += 1; | |
9290 _cur_lexeme = lex_greater; | |
9291 } | |
9292 break; | |
9293 | |
9294 case '<': | |
9295 if (*(cur+1) == '=') | |
9296 { | |
9297 cur += 2; | |
9298 _cur_lexeme = lex_less_or_equal; | |
9299 } | |
9300 else | |
9301 { | |
9302 cur += 1; | |
9303 _cur_lexeme = lex_less; | |
9304 } | |
9305 break; | |
9306 | |
9307 case '!': | |
9308 if (*(cur+1) == '=') | |
9309 { | |
9310 cur += 2; | |
9311 _cur_lexeme = lex_not_equal; | |
9312 } | |
9313 else | |
9314 { | |
9315 _cur_lexeme = lex_none; | |
9316 } | |
9317 break; | |
9318 | |
9319 case '=': | |
9320 cur += 1; | |
9321 _cur_lexeme = lex_equal; | |
9322 | |
9323 break; | |
9324 | |
9325 case '+': | |
9326 cur += 1; | |
9327 _cur_lexeme = lex_plus; | |
9328 | |
9329 break; | |
9330 | |
9331 case '-': | |
9332 cur += 1; | |
9333 _cur_lexeme = lex_minus; | |
9334 | |
9335 break; | |
9336 | |
9337 case '*': | |
9338 cur += 1; | |
9339 _cur_lexeme = lex_multiply; | |
9340 | |
9341 break; | |
9342 | |
9343 case '|': | |
9344 cur += 1; | |
9345 _cur_lexeme = lex_union; | |
9346 | |
9347 break; | |
9348 | |
9349 case '$': | |
9350 cur += 1; | |
9351 | |
9352 if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
9353 { | |
9354 _cur_lexeme_contents.begin = cur; | |
9355 | |
9356 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9357 | |
9358 if (cur[0] == ':' && PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // qname | |
9359 { | |
9360 cur++; // : | |
9361 | |
9362 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9363 } | |
9364 | |
9365 _cur_lexeme_contents.end = cur; | |
9366 | |
9367 _cur_lexeme = lex_var_ref; | |
9368 } | |
9369 else | |
9370 { | |
9371 _cur_lexeme = lex_none; | |
9372 } | |
9373 | |
9374 break; | |
9375 | |
9376 case '(': | |
9377 cur += 1; | |
9378 _cur_lexeme = lex_open_brace; | |
9379 | |
9380 break; | |
9381 | |
9382 case ')': | |
9383 cur += 1; | |
9384 _cur_lexeme = lex_close_brace; | |
9385 | |
9386 break; | |
9387 | |
9388 case '[': | |
9389 cur += 1; | |
9390 _cur_lexeme = lex_open_square_brace; | |
9391 | |
9392 break; | |
9393 | |
9394 case ']': | |
9395 cur += 1; | |
9396 _cur_lexeme = lex_close_square_brace; | |
9397 | |
9398 break; | |
9399 | |
9400 case ',': | |
9401 cur += 1; | |
9402 _cur_lexeme = lex_comma; | |
9403 | |
9404 break; | |
9405 | |
9406 case '/': | |
9407 if (*(cur+1) == '/') | |
9408 { | |
9409 cur += 2; | |
9410 _cur_lexeme = lex_double_slash; | |
9411 } | |
9412 else | |
9413 { | |
9414 cur += 1; | |
9415 _cur_lexeme = lex_slash; | |
9416 } | |
9417 break; | |
9418 | |
9419 case '.': | |
9420 if (*(cur+1) == '.') | |
9421 { | |
9422 cur += 2; | |
9423 _cur_lexeme = lex_double_dot; | |
9424 } | |
9425 else if (PUGI__IS_CHARTYPEX(*(cur+1), ctx_digit)) | |
9426 { | |
9427 _cur_lexeme_contents.begin = cur; // . | |
9428 | |
9429 ++cur; | |
9430 | |
9431 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9432 | |
9433 _cur_lexeme_contents.end = cur; | |
9434 | |
9435 _cur_lexeme = lex_number; | |
9436 } | |
9437 else | |
9438 { | |
9439 cur += 1; | |
9440 _cur_lexeme = lex_dot; | |
9441 } | |
9442 break; | |
9443 | |
9444 case '@': | |
9445 cur += 1; | |
9446 _cur_lexeme = lex_axis_attribute; | |
9447 | |
9448 break; | |
9449 | |
9450 case '"': | |
9451 case '\'': | |
9452 { | |
9453 char_t terminator = *cur; | |
9454 | |
9455 ++cur; | |
9456 | |
9457 _cur_lexeme_contents.begin = cur; | |
9458 while (*cur && *cur != terminator) cur++; | |
9459 _cur_lexeme_contents.end = cur; | |
9460 | |
9461 if (!*cur) | |
9462 _cur_lexeme = lex_none; | |
9463 else | |
9464 { | |
9465 cur += 1; | |
9466 _cur_lexeme = lex_quoted_string; | |
9467 } | |
9468 | |
9469 break; | |
9470 } | |
9471 | |
9472 case ':': | |
9473 if (*(cur+1) == ':') | |
9474 { | |
9475 cur += 2; | |
9476 _cur_lexeme = lex_double_colon; | |
9477 } | |
9478 else | |
9479 { | |
9480 _cur_lexeme = lex_none; | |
9481 } | |
9482 break; | |
9483 | |
9484 default: | |
9485 if (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) | |
9486 { | |
9487 _cur_lexeme_contents.begin = cur; | |
9488 | |
9489 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9490 | |
9491 if (*cur == '.') | |
9492 { | |
9493 cur++; | |
9494 | |
9495 while (PUGI__IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9496 } | |
9497 | |
9498 _cur_lexeme_contents.end = cur; | |
9499 | |
9500 _cur_lexeme = lex_number; | |
9501 } | |
9502 else if (PUGI__IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
9503 { | |
9504 _cur_lexeme_contents.begin = cur; | |
9505 | |
9506 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9507 | |
9508 if (cur[0] == ':') | |
9509 { | |
9510 if (cur[1] == '*') // namespace test ncname:* | |
9511 { | |
9512 cur += 2; // :* | |
9513 } | |
9514 else if (PUGI__IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname | |
9515 { | |
9516 cur++; // : | |
9517 | |
9518 while (PUGI__IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9519 } | |
9520 } | |
9521 | |
9522 _cur_lexeme_contents.end = cur; | |
9523 | |
9524 _cur_lexeme = lex_string; | |
9525 } | |
9526 else | |
9527 { | |
9528 _cur_lexeme = lex_none; | |
9529 } | |
9530 } | |
9531 | |
9532 _cur = cur; | |
9533 } | |
9534 | |
9535 lexeme_t current() const | |
9536 { | |
9537 return _cur_lexeme; | |
9538 } | |
9539 | |
9540 const char_t* current_pos() const | |
9541 { | |
9542 return _cur_lexeme_pos; | |
9543 } | |
9544 | |
9545 const xpath_lexer_string& contents() const | |
9546 { | |
9547 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); | |
9548 | |
9549 return _cur_lexeme_contents; | |
9550 } | |
9551 }; | |
9552 | |
9553 enum ast_type_t | |
9554 { | |
9555 ast_unknown, | |
9556 ast_op_or, // left or right | |
9557 ast_op_and, // left and right | |
9558 ast_op_equal, // left = right | |
9559 ast_op_not_equal, // left != right | |
9560 ast_op_less, // left < right | |
9561 ast_op_greater, // left > right | |
9562 ast_op_less_or_equal, // left <= right | |
9563 ast_op_greater_or_equal, // left >= right | |
9564 ast_op_add, // left + right | |
9565 ast_op_subtract, // left - right | |
9566 ast_op_multiply, // left * right | |
9567 ast_op_divide, // left / right | |
9568 ast_op_mod, // left % right | |
9569 ast_op_negate, // left - right | |
9570 ast_op_union, // left | right | |
9571 ast_predicate, // apply predicate to set; next points to next predicate | |
9572 ast_filter, // select * from left where right | |
9573 ast_string_constant, // string constant | |
9574 ast_number_constant, // number constant | |
9575 ast_variable, // variable | |
9576 ast_func_last, // last() | |
9577 ast_func_position, // position() | |
9578 ast_func_count, // count(left) | |
9579 ast_func_id, // id(left) | |
9580 ast_func_local_name_0, // local-name() | |
9581 ast_func_local_name_1, // local-name(left) | |
9582 ast_func_namespace_uri_0, // namespace-uri() | |
9583 ast_func_namespace_uri_1, // namespace-uri(left) | |
9584 ast_func_name_0, // name() | |
9585 ast_func_name_1, // name(left) | |
9586 ast_func_string_0, // string() | |
9587 ast_func_string_1, // string(left) | |
9588 ast_func_concat, // concat(left, right, siblings) | |
9589 ast_func_starts_with, // starts_with(left, right) | |
9590 ast_func_contains, // contains(left, right) | |
9591 ast_func_substring_before, // substring-before(left, right) | |
9592 ast_func_substring_after, // substring-after(left, right) | |
9593 ast_func_substring_2, // substring(left, right) | |
9594 ast_func_substring_3, // substring(left, right, third) | |
9595 ast_func_string_length_0, // string-length() | |
9596 ast_func_string_length_1, // string-length(left) | |
9597 ast_func_normalize_space_0, // normalize-space() | |
9598 ast_func_normalize_space_1, // normalize-space(left) | |
9599 ast_func_translate, // translate(left, right, third) | |
9600 ast_func_boolean, // boolean(left) | |
9601 ast_func_not, // not(left) | |
9602 ast_func_true, // true() | |
9603 ast_func_false, // false() | |
9604 ast_func_lang, // lang(left) | |
9605 ast_func_number_0, // number() | |
9606 ast_func_number_1, // number(left) | |
9607 ast_func_sum, // sum(left) | |
9608 ast_func_floor, // floor(left) | |
9609 ast_func_ceiling, // ceiling(left) | |
9610 ast_func_round, // round(left) | |
9611 ast_step, // process set left with step | |
9612 ast_step_root, // select root node | |
9613 | |
9614 ast_opt_translate_table, // translate(left, right, third) where right/third are constants | |
9615 ast_opt_compare_attribute // @name = 'string' | |
9616 }; | |
9617 | |
9618 enum axis_t | |
9619 { | |
9620 axis_ancestor, | |
9621 axis_ancestor_or_self, | |
9622 axis_attribute, | |
9623 axis_child, | |
9624 axis_descendant, | |
9625 axis_descendant_or_self, | |
9626 axis_following, | |
9627 axis_following_sibling, | |
9628 axis_namespace, | |
9629 axis_parent, | |
9630 axis_preceding, | |
9631 axis_preceding_sibling, | |
9632 axis_self | |
9633 }; | |
9634 | |
9635 enum nodetest_t | |
9636 { | |
9637 nodetest_none, | |
9638 nodetest_name, | |
9639 nodetest_type_node, | |
9640 nodetest_type_comment, | |
9641 nodetest_type_pi, | |
9642 nodetest_type_text, | |
9643 nodetest_pi, | |
9644 nodetest_all, | |
9645 nodetest_all_in_namespace | |
9646 }; | |
9647 | |
9648 enum predicate_t | |
9649 { | |
9650 predicate_default, | |
9651 predicate_posinv, | |
9652 predicate_constant, | |
9653 predicate_constant_one | |
9654 }; | |
9655 | |
9656 enum nodeset_eval_t | |
9657 { | |
9658 nodeset_eval_all, | |
9659 nodeset_eval_any, | |
9660 nodeset_eval_first | |
9661 }; | |
9662 | |
9663 template <axis_t N> struct axis_to_type | |
9664 { | |
9665 static const axis_t axis; | |
9666 }; | |
9667 | |
9668 template <axis_t N> const axis_t axis_to_type<N>::axis = N; | |
9669 | |
9670 class xpath_ast_node | |
9671 { | |
9672 private: | |
9673 // node type | |
9674 char _type; | |
9675 char _rettype; | |
9676 | |
9677 // for ast_step | |
9678 char _axis; | |
9679 | |
9680 // for ast_step/ast_predicate/ast_filter | |
9681 char _test; | |
9682 | |
9683 // tree node structure | |
9684 xpath_ast_node* _left; | |
9685 xpath_ast_node* _right; | |
9686 xpath_ast_node* _next; | |
9687 | |
9688 union | |
9689 { | |
9690 // value for ast_string_constant | |
9691 const char_t* string; | |
9692 // value for ast_number_constant | |
9693 double number; | |
9694 // variable for ast_variable | |
9695 xpath_variable* variable; | |
9696 // node test for ast_step (node name/namespace/node type/pi target) | |
9697 const char_t* nodetest; | |
9698 // table for ast_opt_translate_table | |
9699 const unsigned char* table; | |
9700 } _data; | |
9701 | |
9702 xpath_ast_node(const xpath_ast_node&); | |
9703 xpath_ast_node& operator=(const xpath_ast_node&); | |
9704 | |
9705 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
9706 { | |
9707 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
9708 | |
9709 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
9710 { | |
9711 if (lt == xpath_type_boolean || rt == xpath_type_boolean) | |
9712 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
9713 else if (lt == xpath_type_number || rt == xpath_type_number) | |
9714 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
9715 else if (lt == xpath_type_string || rt == xpath_type_string) | |
9716 { | |
9717 xpath_allocator_capture cr(stack.result); | |
9718 | |
9719 xpath_string ls = lhs->eval_string(c, stack); | |
9720 xpath_string rs = rhs->eval_string(c, stack); | |
9721 | |
9722 return comp(ls, rs); | |
9723 } | |
9724 } | |
9725 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
9726 { | |
9727 xpath_allocator_capture cr(stack.result); | |
9728 | |
9729 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9730 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9731 | |
9732 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9733 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9734 { | |
9735 xpath_allocator_capture cri(stack.result); | |
9736 | |
9737 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) | |
9738 return true; | |
9739 } | |
9740 | |
9741 return false; | |
9742 } | |
9743 else | |
9744 { | |
9745 if (lt == xpath_type_node_set) | |
9746 { | |
9747 swap(lhs, rhs); | |
9748 swap(lt, rt); | |
9749 } | |
9750 | |
9751 if (lt == xpath_type_boolean) | |
9752 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
9753 else if (lt == xpath_type_number) | |
9754 { | |
9755 xpath_allocator_capture cr(stack.result); | |
9756 | |
9757 double l = lhs->eval_number(c, stack); | |
9758 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9759 | |
9760 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9761 { | |
9762 xpath_allocator_capture cri(stack.result); | |
9763 | |
9764 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9765 return true; | |
9766 } | |
9767 | |
9768 return false; | |
9769 } | |
9770 else if (lt == xpath_type_string) | |
9771 { | |
9772 xpath_allocator_capture cr(stack.result); | |
9773 | |
9774 xpath_string l = lhs->eval_string(c, stack); | |
9775 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9776 | |
9777 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9778 { | |
9779 xpath_allocator_capture cri(stack.result); | |
9780 | |
9781 if (comp(l, string_value(*ri, stack.result))) | |
9782 return true; | |
9783 } | |
9784 | |
9785 return false; | |
9786 } | |
9787 } | |
9788 | |
9789 assert(false && "Wrong types"); // unreachable | |
9790 return false; | |
9791 } | |
9792 | |
9793 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) | |
9794 { | |
9795 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; | |
9796 } | |
9797 | |
9798 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
9799 { | |
9800 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
9801 | |
9802 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
9803 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
9804 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
9805 { | |
9806 xpath_allocator_capture cr(stack.result); | |
9807 | |
9808 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9809 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9810 | |
9811 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9812 { | |
9813 xpath_allocator_capture cri(stack.result); | |
9814 | |
9815 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); | |
9816 | |
9817 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9818 { | |
9819 xpath_allocator_capture crii(stack.result); | |
9820 | |
9821 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9822 return true; | |
9823 } | |
9824 } | |
9825 | |
9826 return false; | |
9827 } | |
9828 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) | |
9829 { | |
9830 xpath_allocator_capture cr(stack.result); | |
9831 | |
9832 double l = lhs->eval_number(c, stack); | |
9833 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9834 | |
9835 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9836 { | |
9837 xpath_allocator_capture cri(stack.result); | |
9838 | |
9839 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9840 return true; | |
9841 } | |
9842 | |
9843 return false; | |
9844 } | |
9845 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) | |
9846 { | |
9847 xpath_allocator_capture cr(stack.result); | |
9848 | |
9849 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9850 double r = rhs->eval_number(c, stack); | |
9851 | |
9852 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9853 { | |
9854 xpath_allocator_capture cri(stack.result); | |
9855 | |
9856 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) | |
9857 return true; | |
9858 } | |
9859 | |
9860 return false; | |
9861 } | |
9862 else | |
9863 { | |
9864 assert(false && "Wrong types"); // unreachable | |
9865 return false; | |
9866 } | |
9867 } | |
9868 | |
9869 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
9870 { | |
9871 assert(ns.size() >= first); | |
9872 assert(expr->rettype() != xpath_type_number); | |
9873 | |
9874 size_t i = 1; | |
9875 size_t size = ns.size() - first; | |
9876 | |
9877 xpath_node* last = ns.begin() + first; | |
9878 | |
9879 // remove_if... or well, sort of | |
9880 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
9881 { | |
9882 xpath_context c(*it, i, size); | |
9883 | |
9884 if (expr->eval_boolean(c, stack)) | |
9885 { | |
9886 *last++ = *it; | |
9887 | |
9888 if (once) break; | |
9889 } | |
9890 } | |
9891 | |
9892 ns.truncate(last); | |
9893 } | |
9894 | |
9895 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
9896 { | |
9897 assert(ns.size() >= first); | |
9898 assert(expr->rettype() == xpath_type_number); | |
9899 | |
9900 size_t i = 1; | |
9901 size_t size = ns.size() - first; | |
9902 | |
9903 xpath_node* last = ns.begin() + first; | |
9904 | |
9905 // remove_if... or well, sort of | |
9906 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
9907 { | |
9908 xpath_context c(*it, i, size); | |
9909 | |
9910 if (expr->eval_number(c, stack) == static_cast<double>(i)) | |
9911 { | |
9912 *last++ = *it; | |
9913 | |
9914 if (once) break; | |
9915 } | |
9916 } | |
9917 | |
9918 ns.truncate(last); | |
9919 } | |
9920 | |
9921 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) | |
9922 { | |
9923 assert(ns.size() >= first); | |
9924 assert(expr->rettype() == xpath_type_number); | |
9925 | |
9926 size_t size = ns.size() - first; | |
9927 | |
9928 xpath_node* last = ns.begin() + first; | |
9929 | |
9930 xpath_context c(xpath_node(), 1, size); | |
9931 | |
9932 double er = expr->eval_number(c, stack); | |
9933 | |
9934 if (er >= 1.0 && er <= static_cast<double>(size)) | |
9935 { | |
9936 size_t eri = static_cast<size_t>(er); | |
9937 | |
9938 if (er == static_cast<double>(eri)) | |
9939 { | |
9940 xpath_node r = last[eri - 1]; | |
9941 | |
9942 *last++ = r; | |
9943 } | |
9944 } | |
9945 | |
9946 ns.truncate(last); | |
9947 } | |
9948 | |
9949 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) | |
9950 { | |
9951 if (ns.size() == first) return; | |
9952 | |
9953 assert(_type == ast_filter || _type == ast_predicate); | |
9954 | |
9955 if (_test == predicate_constant || _test == predicate_constant_one) | |
9956 apply_predicate_number_const(ns, first, _right, stack); | |
9957 else if (_right->rettype() == xpath_type_number) | |
9958 apply_predicate_number(ns, first, _right, stack, once); | |
9959 else | |
9960 apply_predicate_boolean(ns, first, _right, stack, once); | |
9961 } | |
9962 | |
9963 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) | |
9964 { | |
9965 if (ns.size() == first) return; | |
9966 | |
9967 bool last_once = eval_once(ns.type(), eval); | |
9968 | |
9969 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) | |
9970 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); | |
9971 } | |
9972 | |
9973 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) | |
9974 { | |
9975 assert(a); | |
9976 | |
9977 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); | |
9978 | |
9979 switch (_test) | |
9980 { | |
9981 case nodetest_name: | |
9982 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) | |
9983 { | |
9984 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
9985 return true; | |
9986 } | |
9987 break; | |
9988 | |
9989 case nodetest_type_node: | |
9990 case nodetest_all: | |
9991 if (is_xpath_attribute(name)) | |
9992 { | |
9993 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
9994 return true; | |
9995 } | |
9996 break; | |
9997 | |
9998 case nodetest_all_in_namespace: | |
9999 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) | |
10000 { | |
10001 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
10002 return true; | |
10003 } | |
10004 break; | |
10005 | |
10006 default: | |
10007 ; | |
10008 } | |
10009 | |
10010 return false; | |
10011 } | |
10012 | |
10013 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) | |
10014 { | |
10015 assert(n); | |
10016 | |
10017 xml_node_type type = PUGI__NODETYPE(n); | |
10018 | |
10019 switch (_test) | |
10020 { | |
10021 case nodetest_name: | |
10022 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) | |
10023 { | |
10024 ns.push_back(xml_node(n), alloc); | |
10025 return true; | |
10026 } | |
10027 break; | |
10028 | |
10029 case nodetest_type_node: | |
10030 ns.push_back(xml_node(n), alloc); | |
10031 return true; | |
10032 | |
10033 case nodetest_type_comment: | |
10034 if (type == node_comment) | |
10035 { | |
10036 ns.push_back(xml_node(n), alloc); | |
10037 return true; | |
10038 } | |
10039 break; | |
10040 | |
10041 case nodetest_type_text: | |
10042 if (type == node_pcdata || type == node_cdata) | |
10043 { | |
10044 ns.push_back(xml_node(n), alloc); | |
10045 return true; | |
10046 } | |
10047 break; | |
10048 | |
10049 case nodetest_type_pi: | |
10050 if (type == node_pi) | |
10051 { | |
10052 ns.push_back(xml_node(n), alloc); | |
10053 return true; | |
10054 } | |
10055 break; | |
10056 | |
10057 case nodetest_pi: | |
10058 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) | |
10059 { | |
10060 ns.push_back(xml_node(n), alloc); | |
10061 return true; | |
10062 } | |
10063 break; | |
10064 | |
10065 case nodetest_all: | |
10066 if (type == node_element) | |
10067 { | |
10068 ns.push_back(xml_node(n), alloc); | |
10069 return true; | |
10070 } | |
10071 break; | |
10072 | |
10073 case nodetest_all_in_namespace: | |
10074 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) | |
10075 { | |
10076 ns.push_back(xml_node(n), alloc); | |
10077 return true; | |
10078 } | |
10079 break; | |
10080 | |
10081 default: | |
10082 assert(false && "Unknown axis"); // unreachable | |
10083 } | |
10084 | |
10085 return false; | |
10086 } | |
10087 | |
10088 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) | |
10089 { | |
10090 const axis_t axis = T::axis; | |
10091 | |
10092 switch (axis) | |
10093 { | |
10094 case axis_attribute: | |
10095 { | |
10096 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) | |
10097 if (step_push(ns, a, n, alloc) & once) | |
10098 return; | |
10099 | |
10100 break; | |
10101 } | |
10102 | |
10103 case axis_child: | |
10104 { | |
10105 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) | |
10106 if (step_push(ns, c, alloc) & once) | |
10107 return; | |
10108 | |
10109 break; | |
10110 } | |
10111 | |
10112 case axis_descendant: | |
10113 case axis_descendant_or_self: | |
10114 { | |
10115 if (axis == axis_descendant_or_self) | |
10116 if (step_push(ns, n, alloc) & once) | |
10117 return; | |
10118 | |
10119 xml_node_struct* cur = n->first_child; | |
10120 | |
10121 while (cur) | |
10122 { | |
10123 if (step_push(ns, cur, alloc) & once) | |
10124 return; | |
10125 | |
10126 if (cur->first_child) | |
10127 cur = cur->first_child; | |
10128 else | |
10129 { | |
10130 while (!cur->next_sibling) | |
10131 { | |
10132 cur = cur->parent; | |
10133 | |
10134 if (cur == n) return; | |
10135 } | |
10136 | |
10137 cur = cur->next_sibling; | |
10138 } | |
10139 } | |
10140 | |
10141 break; | |
10142 } | |
10143 | |
10144 case axis_following_sibling: | |
10145 { | |
10146 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) | |
10147 if (step_push(ns, c, alloc) & once) | |
10148 return; | |
10149 | |
10150 break; | |
10151 } | |
10152 | |
10153 case axis_preceding_sibling: | |
10154 { | |
10155 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) | |
10156 if (step_push(ns, c, alloc) & once) | |
10157 return; | |
10158 | |
10159 break; | |
10160 } | |
10161 | |
10162 case axis_following: | |
10163 { | |
10164 xml_node_struct* cur = n; | |
10165 | |
10166 // exit from this node so that we don't include descendants | |
10167 while (!cur->next_sibling) | |
10168 { | |
10169 cur = cur->parent; | |
10170 | |
10171 if (!cur) return; | |
10172 } | |
10173 | |
10174 cur = cur->next_sibling; | |
10175 | |
10176 while (cur) | |
10177 { | |
10178 if (step_push(ns, cur, alloc) & once) | |
10179 return; | |
10180 | |
10181 if (cur->first_child) | |
10182 cur = cur->first_child; | |
10183 else | |
10184 { | |
10185 while (!cur->next_sibling) | |
10186 { | |
10187 cur = cur->parent; | |
10188 | |
10189 if (!cur) return; | |
10190 } | |
10191 | |
10192 cur = cur->next_sibling; | |
10193 } | |
10194 } | |
10195 | |
10196 break; | |
10197 } | |
10198 | |
10199 case axis_preceding: | |
10200 { | |
10201 xml_node_struct* cur = n; | |
10202 | |
10203 // exit from this node so that we don't include descendants | |
10204 while (!cur->prev_sibling_c->next_sibling) | |
10205 { | |
10206 cur = cur->parent; | |
10207 | |
10208 if (!cur) return; | |
10209 } | |
10210 | |
10211 cur = cur->prev_sibling_c; | |
10212 | |
10213 while (cur) | |
10214 { | |
10215 if (cur->first_child) | |
10216 cur = cur->first_child->prev_sibling_c; | |
10217 else | |
10218 { | |
10219 // leaf node, can't be ancestor | |
10220 if (step_push(ns, cur, alloc) & once) | |
10221 return; | |
10222 | |
10223 while (!cur->prev_sibling_c->next_sibling) | |
10224 { | |
10225 cur = cur->parent; | |
10226 | |
10227 if (!cur) return; | |
10228 | |
10229 if (!node_is_ancestor(cur, n)) | |
10230 if (step_push(ns, cur, alloc) & once) | |
10231 return; | |
10232 } | |
10233 | |
10234 cur = cur->prev_sibling_c; | |
10235 } | |
10236 } | |
10237 | |
10238 break; | |
10239 } | |
10240 | |
10241 case axis_ancestor: | |
10242 case axis_ancestor_or_self: | |
10243 { | |
10244 if (axis == axis_ancestor_or_self) | |
10245 if (step_push(ns, n, alloc) & once) | |
10246 return; | |
10247 | |
10248 xml_node_struct* cur = n->parent; | |
10249 | |
10250 while (cur) | |
10251 { | |
10252 if (step_push(ns, cur, alloc) & once) | |
10253 return; | |
10254 | |
10255 cur = cur->parent; | |
10256 } | |
10257 | |
10258 break; | |
10259 } | |
10260 | |
10261 case axis_self: | |
10262 { | |
10263 step_push(ns, n, alloc); | |
10264 | |
10265 break; | |
10266 } | |
10267 | |
10268 case axis_parent: | |
10269 { | |
10270 if (n->parent) | |
10271 step_push(ns, n->parent, alloc); | |
10272 | |
10273 break; | |
10274 } | |
10275 | |
10276 default: | |
10277 assert(false && "Unimplemented axis"); // unreachable | |
10278 } | |
10279 } | |
10280 | |
10281 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) | |
10282 { | |
10283 const axis_t axis = T::axis; | |
10284 | |
10285 switch (axis) | |
10286 { | |
10287 case axis_ancestor: | |
10288 case axis_ancestor_or_self: | |
10289 { | |
10290 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test | |
10291 if (step_push(ns, a, p, alloc) & once) | |
10292 return; | |
10293 | |
10294 xml_node_struct* cur = p; | |
10295 | |
10296 while (cur) | |
10297 { | |
10298 if (step_push(ns, cur, alloc) & once) | |
10299 return; | |
10300 | |
10301 cur = cur->parent; | |
10302 } | |
10303 | |
10304 break; | |
10305 } | |
10306 | |
10307 case axis_descendant_or_self: | |
10308 case axis_self: | |
10309 { | |
10310 if (_test == nodetest_type_node) // reject attributes based on principal node type test | |
10311 step_push(ns, a, p, alloc); | |
10312 | |
10313 break; | |
10314 } | |
10315 | |
10316 case axis_following: | |
10317 { | |
10318 xml_node_struct* cur = p; | |
10319 | |
10320 while (cur) | |
10321 { | |
10322 if (cur->first_child) | |
10323 cur = cur->first_child; | |
10324 else | |
10325 { | |
10326 while (!cur->next_sibling) | |
10327 { | |
10328 cur = cur->parent; | |
10329 | |
10330 if (!cur) return; | |
10331 } | |
10332 | |
10333 cur = cur->next_sibling; | |
10334 } | |
10335 | |
10336 if (step_push(ns, cur, alloc) & once) | |
10337 return; | |
10338 } | |
10339 | |
10340 break; | |
10341 } | |
10342 | |
10343 case axis_parent: | |
10344 { | |
10345 step_push(ns, p, alloc); | |
10346 | |
10347 break; | |
10348 } | |
10349 | |
10350 case axis_preceding: | |
10351 { | |
10352 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding | |
10353 step_fill(ns, p, alloc, once, v); | |
10354 break; | |
10355 } | |
10356 | |
10357 default: | |
10358 assert(false && "Unimplemented axis"); // unreachable | |
10359 } | |
10360 } | |
10361 | |
10362 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) | |
10363 { | |
10364 const axis_t axis = T::axis; | |
10365 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); | |
10366 | |
10367 if (xn.node()) | |
10368 step_fill(ns, xn.node().internal_object(), alloc, once, v); | |
10369 else if (axis_has_attributes && xn.attribute() && xn.parent()) | |
10370 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); | |
10371 } | |
10372 | |
10373 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) | |
10374 { | |
10375 const axis_t axis = T::axis; | |
10376 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); | |
10377 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
10378 | |
10379 bool once = | |
10380 (axis == axis_attribute && _test == nodetest_name) || | |
10381 (!_right && eval_once(axis_type, eval)) || | |
10382 // coverity[mixed_enums] | |
10383 (_right && !_right->_next && _right->_test == predicate_constant_one); | |
10384 | |
10385 xpath_node_set_raw ns; | |
10386 ns.set_type(axis_type); | |
10387 | |
10388 if (_left) | |
10389 { | |
10390 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); | |
10391 | |
10392 // self axis preserves the original order | |
10393 if (axis == axis_self) ns.set_type(s.type()); | |
10394 | |
10395 for (const xpath_node* it = s.begin(); it != s.end(); ++it) | |
10396 { | |
10397 size_t size = ns.size(); | |
10398 | |
10399 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes | |
10400 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); | |
10401 | |
10402 step_fill(ns, *it, stack.result, once, v); | |
10403 if (_right) apply_predicates(ns, size, stack, eval); | |
10404 } | |
10405 } | |
10406 else | |
10407 { | |
10408 step_fill(ns, c.n, stack.result, once, v); | |
10409 if (_right) apply_predicates(ns, 0, stack, eval); | |
10410 } | |
10411 | |
10412 // child, attribute and self axes always generate unique set of nodes | |
10413 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice | |
10414 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) | |
10415 ns.remove_duplicates(stack.temp); | |
10416 | |
10417 return ns; | |
10418 } | |
10419 | |
10420 public: | |
10421 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): | |
10422 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10423 { | |
10424 assert(type == ast_string_constant); | |
10425 _data.string = value; | |
10426 } | |
10427 | |
10428 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): | |
10429 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10430 { | |
10431 assert(type == ast_number_constant); | |
10432 _data.number = value; | |
10433 } | |
10434 | |
10435 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): | |
10436 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10437 { | |
10438 assert(type == ast_variable); | |
10439 _data.variable = value; | |
10440 } | |
10441 | |
10442 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): | |
10443 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) | |
10444 { | |
10445 } | |
10446 | |
10447 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): | |
10448 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) | |
10449 { | |
10450 assert(type == ast_step); | |
10451 _data.nodetest = contents; | |
10452 } | |
10453 | |
10454 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): | |
10455 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) | |
10456 { | |
10457 assert(type == ast_filter || type == ast_predicate); | |
10458 } | |
10459 | |
10460 void set_next(xpath_ast_node* value) | |
10461 { | |
10462 _next = value; | |
10463 } | |
10464 | |
10465 void set_right(xpath_ast_node* value) | |
10466 { | |
10467 _right = value; | |
10468 } | |
10469 | |
10470 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) | |
10471 { | |
10472 switch (_type) | |
10473 { | |
10474 case ast_op_or: | |
10475 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); | |
10476 | |
10477 case ast_op_and: | |
10478 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); | |
10479 | |
10480 case ast_op_equal: | |
10481 return compare_eq(_left, _right, c, stack, equal_to()); | |
10482 | |
10483 case ast_op_not_equal: | |
10484 return compare_eq(_left, _right, c, stack, not_equal_to()); | |
10485 | |
10486 case ast_op_less: | |
10487 return compare_rel(_left, _right, c, stack, less()); | |
10488 | |
10489 case ast_op_greater: | |
10490 return compare_rel(_right, _left, c, stack, less()); | |
10491 | |
10492 case ast_op_less_or_equal: | |
10493 return compare_rel(_left, _right, c, stack, less_equal()); | |
10494 | |
10495 case ast_op_greater_or_equal: | |
10496 return compare_rel(_right, _left, c, stack, less_equal()); | |
10497 | |
10498 case ast_func_starts_with: | |
10499 { | |
10500 xpath_allocator_capture cr(stack.result); | |
10501 | |
10502 xpath_string lr = _left->eval_string(c, stack); | |
10503 xpath_string rr = _right->eval_string(c, stack); | |
10504 | |
10505 return starts_with(lr.c_str(), rr.c_str()); | |
10506 } | |
10507 | |
10508 case ast_func_contains: | |
10509 { | |
10510 xpath_allocator_capture cr(stack.result); | |
10511 | |
10512 xpath_string lr = _left->eval_string(c, stack); | |
10513 xpath_string rr = _right->eval_string(c, stack); | |
10514 | |
10515 return find_substring(lr.c_str(), rr.c_str()) != 0; | |
10516 } | |
10517 | |
10518 case ast_func_boolean: | |
10519 return _left->eval_boolean(c, stack); | |
10520 | |
10521 case ast_func_not: | |
10522 return !_left->eval_boolean(c, stack); | |
10523 | |
10524 case ast_func_true: | |
10525 return true; | |
10526 | |
10527 case ast_func_false: | |
10528 return false; | |
10529 | |
10530 case ast_func_lang: | |
10531 { | |
10532 if (c.n.attribute()) return false; | |
10533 | |
10534 xpath_allocator_capture cr(stack.result); | |
10535 | |
10536 xpath_string lang = _left->eval_string(c, stack); | |
10537 | |
10538 for (xml_node n = c.n.node(); n; n = n.parent()) | |
10539 { | |
10540 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); | |
10541 | |
10542 if (a) | |
10543 { | |
10544 const char_t* value = a.value(); | |
10545 | |
10546 // strnicmp / strncasecmp is not portable | |
10547 for (const char_t* lit = lang.c_str(); *lit; ++lit) | |
10548 { | |
10549 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; | |
10550 ++value; | |
10551 } | |
10552 | |
10553 return *value == 0 || *value == '-'; | |
10554 } | |
10555 } | |
10556 | |
10557 return false; | |
10558 } | |
10559 | |
10560 case ast_opt_compare_attribute: | |
10561 { | |
10562 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); | |
10563 | |
10564 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); | |
10565 | |
10566 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); | |
10567 } | |
10568 | |
10569 case ast_variable: | |
10570 { | |
10571 assert(_rettype == _data.variable->type()); | |
10572 | |
10573 if (_rettype == xpath_type_boolean) | |
10574 return _data.variable->get_boolean(); | |
10575 | |
10576 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
10577 break; | |
10578 } | |
10579 | |
10580 default: | |
10581 ; | |
10582 } | |
10583 | |
10584 // none of the ast types that return the value directly matched, we need to perform type conversion | |
10585 switch (_rettype) | |
10586 { | |
10587 case xpath_type_number: | |
10588 return convert_number_to_boolean(eval_number(c, stack)); | |
10589 | |
10590 case xpath_type_string: | |
10591 { | |
10592 xpath_allocator_capture cr(stack.result); | |
10593 | |
10594 return !eval_string(c, stack).empty(); | |
10595 } | |
10596 | |
10597 case xpath_type_node_set: | |
10598 { | |
10599 xpath_allocator_capture cr(stack.result); | |
10600 | |
10601 return !eval_node_set(c, stack, nodeset_eval_any).empty(); | |
10602 } | |
10603 | |
10604 default: | |
10605 assert(false && "Wrong expression for return type boolean"); // unreachable | |
10606 return false; | |
10607 } | |
10608 } | |
10609 | |
10610 double eval_number(const xpath_context& c, const xpath_stack& stack) | |
10611 { | |
10612 switch (_type) | |
10613 { | |
10614 case ast_op_add: | |
10615 return _left->eval_number(c, stack) + _right->eval_number(c, stack); | |
10616 | |
10617 case ast_op_subtract: | |
10618 return _left->eval_number(c, stack) - _right->eval_number(c, stack); | |
10619 | |
10620 case ast_op_multiply: | |
10621 return _left->eval_number(c, stack) * _right->eval_number(c, stack); | |
10622 | |
10623 case ast_op_divide: | |
10624 return _left->eval_number(c, stack) / _right->eval_number(c, stack); | |
10625 | |
10626 case ast_op_mod: | |
10627 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); | |
10628 | |
10629 case ast_op_negate: | |
10630 return -_left->eval_number(c, stack); | |
10631 | |
10632 case ast_number_constant: | |
10633 return _data.number; | |
10634 | |
10635 case ast_func_last: | |
10636 return static_cast<double>(c.size); | |
10637 | |
10638 case ast_func_position: | |
10639 return static_cast<double>(c.position); | |
10640 | |
10641 case ast_func_count: | |
10642 { | |
10643 xpath_allocator_capture cr(stack.result); | |
10644 | |
10645 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); | |
10646 } | |
10647 | |
10648 case ast_func_string_length_0: | |
10649 { | |
10650 xpath_allocator_capture cr(stack.result); | |
10651 | |
10652 return static_cast<double>(string_value(c.n, stack.result).length()); | |
10653 } | |
10654 | |
10655 case ast_func_string_length_1: | |
10656 { | |
10657 xpath_allocator_capture cr(stack.result); | |
10658 | |
10659 return static_cast<double>(_left->eval_string(c, stack).length()); | |
10660 } | |
10661 | |
10662 case ast_func_number_0: | |
10663 { | |
10664 xpath_allocator_capture cr(stack.result); | |
10665 | |
10666 return convert_string_to_number(string_value(c.n, stack.result).c_str()); | |
10667 } | |
10668 | |
10669 case ast_func_number_1: | |
10670 return _left->eval_number(c, stack); | |
10671 | |
10672 case ast_func_sum: | |
10673 { | |
10674 xpath_allocator_capture cr(stack.result); | |
10675 | |
10676 double r = 0; | |
10677 | |
10678 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); | |
10679 | |
10680 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) | |
10681 { | |
10682 xpath_allocator_capture cri(stack.result); | |
10683 | |
10684 r += convert_string_to_number(string_value(*it, stack.result).c_str()); | |
10685 } | |
10686 | |
10687 return r; | |
10688 } | |
10689 | |
10690 case ast_func_floor: | |
10691 { | |
10692 double r = _left->eval_number(c, stack); | |
10693 | |
10694 return r == r ? floor(r) : r; | |
10695 } | |
10696 | |
10697 case ast_func_ceiling: | |
10698 { | |
10699 double r = _left->eval_number(c, stack); | |
10700 | |
10701 return r == r ? ceil(r) : r; | |
10702 } | |
10703 | |
10704 case ast_func_round: | |
10705 return round_nearest_nzero(_left->eval_number(c, stack)); | |
10706 | |
10707 case ast_variable: | |
10708 { | |
10709 assert(_rettype == _data.variable->type()); | |
10710 | |
10711 if (_rettype == xpath_type_number) | |
10712 return _data.variable->get_number(); | |
10713 | |
10714 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
10715 break; | |
10716 } | |
10717 | |
10718 default: | |
10719 ; | |
10720 } | |
10721 | |
10722 // none of the ast types that return the value directly matched, we need to perform type conversion | |
10723 switch (_rettype) | |
10724 { | |
10725 case xpath_type_boolean: | |
10726 return eval_boolean(c, stack) ? 1 : 0; | |
10727 | |
10728 case xpath_type_string: | |
10729 { | |
10730 xpath_allocator_capture cr(stack.result); | |
10731 | |
10732 return convert_string_to_number(eval_string(c, stack).c_str()); | |
10733 } | |
10734 | |
10735 case xpath_type_node_set: | |
10736 { | |
10737 xpath_allocator_capture cr(stack.result); | |
10738 | |
10739 return convert_string_to_number(eval_string(c, stack).c_str()); | |
10740 } | |
10741 | |
10742 default: | |
10743 assert(false && "Wrong expression for return type number"); // unreachable | |
10744 return 0; | |
10745 } | |
10746 } | |
10747 | |
10748 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) | |
10749 { | |
10750 assert(_type == ast_func_concat); | |
10751 | |
10752 xpath_allocator_capture ct(stack.temp); | |
10753 | |
10754 // count the string number | |
10755 size_t count = 1; | |
10756 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; | |
10757 | |
10758 // allocate a buffer for temporary string objects | |
10759 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); | |
10760 if (!buffer) return xpath_string(); | |
10761 | |
10762 // evaluate all strings to temporary stack | |
10763 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10764 | |
10765 buffer[0] = _left->eval_string(c, swapped_stack); | |
10766 | |
10767 size_t pos = 1; | |
10768 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); | |
10769 assert(pos == count); | |
10770 | |
10771 // get total length | |
10772 size_t length = 0; | |
10773 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); | |
10774 | |
10775 // create final string | |
10776 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); | |
10777 if (!result) return xpath_string(); | |
10778 | |
10779 char_t* ri = result; | |
10780 | |
10781 for (size_t j = 0; j < count; ++j) | |
10782 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) | |
10783 *ri++ = *bi; | |
10784 | |
10785 *ri = 0; | |
10786 | |
10787 return xpath_string::from_heap_preallocated(result, ri); | |
10788 } | |
10789 | |
10790 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) | |
10791 { | |
10792 switch (_type) | |
10793 { | |
10794 case ast_string_constant: | |
10795 return xpath_string::from_const(_data.string); | |
10796 | |
10797 case ast_func_local_name_0: | |
10798 { | |
10799 xpath_node na = c.n; | |
10800 | |
10801 return xpath_string::from_const(local_name(na)); | |
10802 } | |
10803 | |
10804 case ast_func_local_name_1: | |
10805 { | |
10806 xpath_allocator_capture cr(stack.result); | |
10807 | |
10808 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10809 xpath_node na = ns.first(); | |
10810 | |
10811 return xpath_string::from_const(local_name(na)); | |
10812 } | |
10813 | |
10814 case ast_func_name_0: | |
10815 { | |
10816 xpath_node na = c.n; | |
10817 | |
10818 return xpath_string::from_const(qualified_name(na)); | |
10819 } | |
10820 | |
10821 case ast_func_name_1: | |
10822 { | |
10823 xpath_allocator_capture cr(stack.result); | |
10824 | |
10825 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10826 xpath_node na = ns.first(); | |
10827 | |
10828 return xpath_string::from_const(qualified_name(na)); | |
10829 } | |
10830 | |
10831 case ast_func_namespace_uri_0: | |
10832 { | |
10833 xpath_node na = c.n; | |
10834 | |
10835 return xpath_string::from_const(namespace_uri(na)); | |
10836 } | |
10837 | |
10838 case ast_func_namespace_uri_1: | |
10839 { | |
10840 xpath_allocator_capture cr(stack.result); | |
10841 | |
10842 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10843 xpath_node na = ns.first(); | |
10844 | |
10845 return xpath_string::from_const(namespace_uri(na)); | |
10846 } | |
10847 | |
10848 case ast_func_string_0: | |
10849 return string_value(c.n, stack.result); | |
10850 | |
10851 case ast_func_string_1: | |
10852 return _left->eval_string(c, stack); | |
10853 | |
10854 case ast_func_concat: | |
10855 return eval_string_concat(c, stack); | |
10856 | |
10857 case ast_func_substring_before: | |
10858 { | |
10859 xpath_allocator_capture cr(stack.temp); | |
10860 | |
10861 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10862 | |
10863 xpath_string s = _left->eval_string(c, swapped_stack); | |
10864 xpath_string p = _right->eval_string(c, swapped_stack); | |
10865 | |
10866 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
10867 | |
10868 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); | |
10869 } | |
10870 | |
10871 case ast_func_substring_after: | |
10872 { | |
10873 xpath_allocator_capture cr(stack.temp); | |
10874 | |
10875 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10876 | |
10877 xpath_string s = _left->eval_string(c, swapped_stack); | |
10878 xpath_string p = _right->eval_string(c, swapped_stack); | |
10879 | |
10880 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
10881 if (!pos) return xpath_string(); | |
10882 | |
10883 const char_t* rbegin = pos + p.length(); | |
10884 const char_t* rend = s.c_str() + s.length(); | |
10885 | |
10886 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
10887 } | |
10888 | |
10889 case ast_func_substring_2: | |
10890 { | |
10891 xpath_allocator_capture cr(stack.temp); | |
10892 | |
10893 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10894 | |
10895 xpath_string s = _left->eval_string(c, swapped_stack); | |
10896 size_t s_length = s.length(); | |
10897 | |
10898 double first = round_nearest(_right->eval_number(c, stack)); | |
10899 | |
10900 if (is_nan(first)) return xpath_string(); // NaN | |
10901 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
10902 | |
10903 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
10904 assert(1 <= pos && pos <= s_length + 1); | |
10905 | |
10906 const char_t* rbegin = s.c_str() + (pos - 1); | |
10907 const char_t* rend = s.c_str() + s.length(); | |
10908 | |
10909 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
10910 } | |
10911 | |
10912 case ast_func_substring_3: | |
10913 { | |
10914 xpath_allocator_capture cr(stack.temp); | |
10915 | |
10916 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10917 | |
10918 xpath_string s = _left->eval_string(c, swapped_stack); | |
10919 size_t s_length = s.length(); | |
10920 | |
10921 double first = round_nearest(_right->eval_number(c, stack)); | |
10922 double last = first + round_nearest(_right->_next->eval_number(c, stack)); | |
10923 | |
10924 if (is_nan(first) || is_nan(last)) return xpath_string(); | |
10925 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
10926 else if (first >= last) return xpath_string(); | |
10927 else if (last < 1) return xpath_string(); | |
10928 | |
10929 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
10930 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last); | |
10931 | |
10932 assert(1 <= pos && pos <= end && end <= s_length + 1); | |
10933 const char_t* rbegin = s.c_str() + (pos - 1); | |
10934 const char_t* rend = s.c_str() + (end - 1); | |
10935 | |
10936 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); | |
10937 } | |
10938 | |
10939 case ast_func_normalize_space_0: | |
10940 { | |
10941 xpath_string s = string_value(c.n, stack.result); | |
10942 | |
10943 char_t* begin = s.data(stack.result); | |
10944 if (!begin) return xpath_string(); | |
10945 | |
10946 char_t* end = normalize_space(begin); | |
10947 | |
10948 return xpath_string::from_heap_preallocated(begin, end); | |
10949 } | |
10950 | |
10951 case ast_func_normalize_space_1: | |
10952 { | |
10953 xpath_string s = _left->eval_string(c, stack); | |
10954 | |
10955 char_t* begin = s.data(stack.result); | |
10956 if (!begin) return xpath_string(); | |
10957 | |
10958 char_t* end = normalize_space(begin); | |
10959 | |
10960 return xpath_string::from_heap_preallocated(begin, end); | |
10961 } | |
10962 | |
10963 case ast_func_translate: | |
10964 { | |
10965 xpath_allocator_capture cr(stack.temp); | |
10966 | |
10967 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10968 | |
10969 xpath_string s = _left->eval_string(c, stack); | |
10970 xpath_string from = _right->eval_string(c, swapped_stack); | |
10971 xpath_string to = _right->_next->eval_string(c, swapped_stack); | |
10972 | |
10973 char_t* begin = s.data(stack.result); | |
10974 if (!begin) return xpath_string(); | |
10975 | |
10976 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); | |
10977 | |
10978 return xpath_string::from_heap_preallocated(begin, end); | |
10979 } | |
10980 | |
10981 case ast_opt_translate_table: | |
10982 { | |
10983 xpath_string s = _left->eval_string(c, stack); | |
10984 | |
10985 char_t* begin = s.data(stack.result); | |
10986 if (!begin) return xpath_string(); | |
10987 | |
10988 char_t* end = translate_table(begin, _data.table); | |
10989 | |
10990 return xpath_string::from_heap_preallocated(begin, end); | |
10991 } | |
10992 | |
10993 case ast_variable: | |
10994 { | |
10995 assert(_rettype == _data.variable->type()); | |
10996 | |
10997 if (_rettype == xpath_type_string) | |
10998 return xpath_string::from_const(_data.variable->get_string()); | |
10999 | |
11000 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
11001 break; | |
11002 } | |
11003 | |
11004 default: | |
11005 ; | |
11006 } | |
11007 | |
11008 // none of the ast types that return the value directly matched, we need to perform type conversion | |
11009 switch (_rettype) | |
11010 { | |
11011 case xpath_type_boolean: | |
11012 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); | |
11013 | |
11014 case xpath_type_number: | |
11015 return convert_number_to_string(eval_number(c, stack), stack.result); | |
11016 | |
11017 case xpath_type_node_set: | |
11018 { | |
11019 xpath_allocator_capture cr(stack.temp); | |
11020 | |
11021 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
11022 | |
11023 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); | |
11024 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); | |
11025 } | |
11026 | |
11027 default: | |
11028 assert(false && "Wrong expression for return type string"); // unreachable | |
11029 return xpath_string(); | |
11030 } | |
11031 } | |
11032 | |
11033 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) | |
11034 { | |
11035 switch (_type) | |
11036 { | |
11037 case ast_op_union: | |
11038 { | |
11039 xpath_allocator_capture cr(stack.temp); | |
11040 | |
11041 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
11042 | |
11043 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval); | |
11044 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval); | |
11045 | |
11046 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother | |
11047 ls.set_type(xpath_node_set::type_unsorted); | |
11048 | |
11049 ls.append(rs.begin(), rs.end(), stack.result); | |
11050 ls.remove_duplicates(stack.temp); | |
11051 | |
11052 return ls; | |
11053 } | |
11054 | |
11055 case ast_filter: | |
11056 { | |
11057 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); | |
11058 | |
11059 // either expression is a number or it contains position() call; sort by document order | |
11060 if (_test != predicate_posinv) set.sort_do(); | |
11061 | |
11062 bool once = eval_once(set.type(), eval); | |
11063 | |
11064 apply_predicate(set, 0, stack, once); | |
11065 | |
11066 return set; | |
11067 } | |
11068 | |
11069 case ast_func_id: | |
11070 return xpath_node_set_raw(); | |
11071 | |
11072 case ast_step: | |
11073 { | |
11074 switch (_axis) | |
11075 { | |
11076 case axis_ancestor: | |
11077 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); | |
11078 | |
11079 case axis_ancestor_or_self: | |
11080 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); | |
11081 | |
11082 case axis_attribute: | |
11083 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); | |
11084 | |
11085 case axis_child: | |
11086 return step_do(c, stack, eval, axis_to_type<axis_child>()); | |
11087 | |
11088 case axis_descendant: | |
11089 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); | |
11090 | |
11091 case axis_descendant_or_self: | |
11092 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); | |
11093 | |
11094 case axis_following: | |
11095 return step_do(c, stack, eval, axis_to_type<axis_following>()); | |
11096 | |
11097 case axis_following_sibling: | |
11098 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); | |
11099 | |
11100 case axis_namespace: | |
11101 // namespaced axis is not supported | |
11102 return xpath_node_set_raw(); | |
11103 | |
11104 case axis_parent: | |
11105 return step_do(c, stack, eval, axis_to_type<axis_parent>()); | |
11106 | |
11107 case axis_preceding: | |
11108 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); | |
11109 | |
11110 case axis_preceding_sibling: | |
11111 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); | |
11112 | |
11113 case axis_self: | |
11114 return step_do(c, stack, eval, axis_to_type<axis_self>()); | |
11115 | |
11116 default: | |
11117 assert(false && "Unknown axis"); // unreachable | |
11118 return xpath_node_set_raw(); | |
11119 } | |
11120 } | |
11121 | |
11122 case ast_step_root: | |
11123 { | |
11124 assert(!_right); // root step can't have any predicates | |
11125 | |
11126 xpath_node_set_raw ns; | |
11127 | |
11128 ns.set_type(xpath_node_set::type_sorted); | |
11129 | |
11130 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); | |
11131 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); | |
11132 | |
11133 return ns; | |
11134 } | |
11135 | |
11136 case ast_variable: | |
11137 { | |
11138 assert(_rettype == _data.variable->type()); | |
11139 | |
11140 if (_rettype == xpath_type_node_set) | |
11141 { | |
11142 const xpath_node_set& s = _data.variable->get_node_set(); | |
11143 | |
11144 xpath_node_set_raw ns; | |
11145 | |
11146 ns.set_type(s.type()); | |
11147 ns.append(s.begin(), s.end(), stack.result); | |
11148 | |
11149 return ns; | |
11150 } | |
11151 | |
11152 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
11153 break; | |
11154 } | |
11155 | |
11156 default: | |
11157 ; | |
11158 } | |
11159 | |
11160 // none of the ast types that return the value directly matched, but conversions to node set are invalid | |
11161 assert(false && "Wrong expression for return type node set"); // unreachable | |
11162 return xpath_node_set_raw(); | |
11163 } | |
11164 | |
11165 void optimize(xpath_allocator* alloc) | |
11166 { | |
11167 if (_left) | |
11168 _left->optimize(alloc); | |
11169 | |
11170 if (_right) | |
11171 _right->optimize(alloc); | |
11172 | |
11173 if (_next) | |
11174 _next->optimize(alloc); | |
11175 | |
11176 // coverity[var_deref_model] | |
11177 optimize_self(alloc); | |
11178 } | |
11179 | |
11180 void optimize_self(xpath_allocator* alloc) | |
11181 { | |
11182 // Rewrite [position()=expr] with [expr] | |
11183 // Note that this step has to go before classification to recognize [position()=1] | |
11184 if ((_type == ast_filter || _type == ast_predicate) && | |
11185 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
11186 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) | |
11187 { | |
11188 _right = _right->_right; | |
11189 } | |
11190 | |
11191 // Classify filter/predicate ops to perform various optimizations during evaluation | |
11192 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
11193 { | |
11194 assert(_test == predicate_default); | |
11195 | |
11196 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) | |
11197 _test = predicate_constant_one; | |
11198 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) | |
11199 _test = predicate_constant; | |
11200 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) | |
11201 _test = predicate_posinv; | |
11202 } | |
11203 | |
11204 // Rewrite descendant-or-self::node()/child::foo with descendant::foo | |
11205 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately | |
11206 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes | |
11207 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) | |
11208 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && | |
11209 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && | |
11210 is_posinv_step()) | |
11211 { | |
11212 if (_axis == axis_child || _axis == axis_descendant) | |
11213 _axis = axis_descendant; | |
11214 else | |
11215 _axis = axis_descendant_or_self; | |
11216 | |
11217 _left = _left->_left; | |
11218 } | |
11219 | |
11220 // Use optimized lookup table implementation for translate() with constant arguments | |
11221 if (_type == ast_func_translate && | |
11222 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate) | |
11223 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) | |
11224 { | |
11225 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); | |
11226 | |
11227 if (table) | |
11228 { | |
11229 _type = ast_opt_translate_table; | |
11230 _data.table = table; | |
11231 } | |
11232 } | |
11233 | |
11234 // Use optimized path for @attr = 'value' or @attr = $value | |
11235 if (_type == ast_op_equal && | |
11236 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal) | |
11237 // coverity[mixed_enums] | |
11238 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && | |
11239 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) | |
11240 { | |
11241 _type = ast_opt_compare_attribute; | |
11242 } | |
11243 } | |
11244 | |
11245 bool is_posinv_expr() const | |
11246 { | |
11247 switch (_type) | |
11248 { | |
11249 case ast_func_position: | |
11250 case ast_func_last: | |
11251 return false; | |
11252 | |
11253 case ast_string_constant: | |
11254 case ast_number_constant: | |
11255 case ast_variable: | |
11256 return true; | |
11257 | |
11258 case ast_step: | |
11259 case ast_step_root: | |
11260 return true; | |
11261 | |
11262 case ast_predicate: | |
11263 case ast_filter: | |
11264 return true; | |
11265 | |
11266 default: | |
11267 if (_left && !_left->is_posinv_expr()) return false; | |
11268 | |
11269 for (xpath_ast_node* n = _right; n; n = n->_next) | |
11270 if (!n->is_posinv_expr()) return false; | |
11271 | |
11272 return true; | |
11273 } | |
11274 } | |
11275 | |
11276 bool is_posinv_step() const | |
11277 { | |
11278 assert(_type == ast_step); | |
11279 | |
11280 for (xpath_ast_node* n = _right; n; n = n->_next) | |
11281 { | |
11282 assert(n->_type == ast_predicate); | |
11283 | |
11284 if (n->_test != predicate_posinv) | |
11285 return false; | |
11286 } | |
11287 | |
11288 return true; | |
11289 } | |
11290 | |
11291 xpath_value_type rettype() const | |
11292 { | |
11293 return static_cast<xpath_value_type>(_rettype); | |
11294 } | |
11295 }; | |
11296 | |
11297 static const size_t xpath_ast_depth_limit = | |
11298 #ifdef PUGIXML_XPATH_DEPTH_LIMIT | |
11299 PUGIXML_XPATH_DEPTH_LIMIT | |
11300 #else | |
11301 1024 | |
11302 #endif | |
11303 ; | |
11304 | |
11305 struct xpath_parser | |
11306 { | |
11307 xpath_allocator* _alloc; | |
11308 xpath_lexer _lexer; | |
11309 | |
11310 const char_t* _query; | |
11311 xpath_variable_set* _variables; | |
11312 | |
11313 xpath_parse_result* _result; | |
11314 | |
11315 char_t _scratch[32]; | |
11316 | |
11317 size_t _depth; | |
11318 | |
11319 xpath_ast_node* error(const char* message) | |
11320 { | |
11321 _result->error = message; | |
11322 _result->offset = _lexer.current_pos() - _query; | |
11323 | |
11324 return 0; | |
11325 } | |
11326 | |
11327 xpath_ast_node* error_oom() | |
11328 { | |
11329 assert(_alloc->_error); | |
11330 *_alloc->_error = true; | |
11331 | |
11332 return 0; | |
11333 } | |
11334 | |
11335 xpath_ast_node* error_rec() | |
11336 { | |
11337 return error("Exceeded maximum allowed query depth"); | |
11338 } | |
11339 | |
11340 void* alloc_node() | |
11341 { | |
11342 return _alloc->allocate(sizeof(xpath_ast_node)); | |
11343 } | |
11344 | |
11345 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) | |
11346 { | |
11347 void* memory = alloc_node(); | |
11348 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11349 } | |
11350 | |
11351 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) | |
11352 { | |
11353 void* memory = alloc_node(); | |
11354 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11355 } | |
11356 | |
11357 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) | |
11358 { | |
11359 void* memory = alloc_node(); | |
11360 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11361 } | |
11362 | |
11363 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) | |
11364 { | |
11365 void* memory = alloc_node(); | |
11366 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; | |
11367 } | |
11368 | |
11369 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) | |
11370 { | |
11371 void* memory = alloc_node(); | |
11372 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; | |
11373 } | |
11374 | |
11375 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) | |
11376 { | |
11377 void* memory = alloc_node(); | |
11378 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; | |
11379 } | |
11380 | |
11381 const char_t* alloc_string(const xpath_lexer_string& value) | |
11382 { | |
11383 if (!value.begin) | |
11384 return PUGIXML_TEXT(""); | |
11385 | |
11386 size_t length = static_cast<size_t>(value.end - value.begin); | |
11387 | |
11388 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); | |
11389 if (!c) return 0; | |
11390 | |
11391 memcpy(c, value.begin, length * sizeof(char_t)); | |
11392 c[length] = 0; | |
11393 | |
11394 return c; | |
11395 } | |
11396 | |
11397 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) | |
11398 { | |
11399 switch (name.begin[0]) | |
11400 { | |
11401 case 'b': | |
11402 if (name == PUGIXML_TEXT("boolean") && argc == 1) | |
11403 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); | |
11404 | |
11405 break; | |
11406 | |
11407 case 'c': | |
11408 if (name == PUGIXML_TEXT("count") && argc == 1) | |
11409 { | |
11410 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11411 return alloc_node(ast_func_count, xpath_type_number, args[0]); | |
11412 } | |
11413 else if (name == PUGIXML_TEXT("contains") && argc == 2) | |
11414 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); | |
11415 else if (name == PUGIXML_TEXT("concat") && argc >= 2) | |
11416 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); | |
11417 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) | |
11418 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); | |
11419 | |
11420 break; | |
11421 | |
11422 case 'f': | |
11423 if (name == PUGIXML_TEXT("false") && argc == 0) | |
11424 return alloc_node(ast_func_false, xpath_type_boolean); | |
11425 else if (name == PUGIXML_TEXT("floor") && argc == 1) | |
11426 return alloc_node(ast_func_floor, xpath_type_number, args[0]); | |
11427 | |
11428 break; | |
11429 | |
11430 case 'i': | |
11431 if (name == PUGIXML_TEXT("id") && argc == 1) | |
11432 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); | |
11433 | |
11434 break; | |
11435 | |
11436 case 'l': | |
11437 if (name == PUGIXML_TEXT("last") && argc == 0) | |
11438 return alloc_node(ast_func_last, xpath_type_number); | |
11439 else if (name == PUGIXML_TEXT("lang") && argc == 1) | |
11440 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); | |
11441 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) | |
11442 { | |
11443 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11444 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); | |
11445 } | |
11446 | |
11447 break; | |
11448 | |
11449 case 'n': | |
11450 if (name == PUGIXML_TEXT("name") && argc <= 1) | |
11451 { | |
11452 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11453 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); | |
11454 } | |
11455 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) | |
11456 { | |
11457 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11458 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); | |
11459 } | |
11460 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) | |
11461 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); | |
11462 else if (name == PUGIXML_TEXT("not") && argc == 1) | |
11463 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); | |
11464 else if (name == PUGIXML_TEXT("number") && argc <= 1) | |
11465 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); | |
11466 | |
11467 break; | |
11468 | |
11469 case 'p': | |
11470 if (name == PUGIXML_TEXT("position") && argc == 0) | |
11471 return alloc_node(ast_func_position, xpath_type_number); | |
11472 | |
11473 break; | |
11474 | |
11475 case 'r': | |
11476 if (name == PUGIXML_TEXT("round") && argc == 1) | |
11477 return alloc_node(ast_func_round, xpath_type_number, args[0]); | |
11478 | |
11479 break; | |
11480 | |
11481 case 's': | |
11482 if (name == PUGIXML_TEXT("string") && argc <= 1) | |
11483 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); | |
11484 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) | |
11485 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); | |
11486 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) | |
11487 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); | |
11488 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) | |
11489 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); | |
11490 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) | |
11491 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); | |
11492 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) | |
11493 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); | |
11494 else if (name == PUGIXML_TEXT("sum") && argc == 1) | |
11495 { | |
11496 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11497 return alloc_node(ast_func_sum, xpath_type_number, args[0]); | |
11498 } | |
11499 | |
11500 break; | |
11501 | |
11502 case 't': | |
11503 if (name == PUGIXML_TEXT("translate") && argc == 3) | |
11504 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); | |
11505 else if (name == PUGIXML_TEXT("true") && argc == 0) | |
11506 return alloc_node(ast_func_true, xpath_type_boolean); | |
11507 | |
11508 break; | |
11509 | |
11510 default: | |
11511 break; | |
11512 } | |
11513 | |
11514 return error("Unrecognized function or wrong parameter count"); | |
11515 } | |
11516 | |
11517 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) | |
11518 { | |
11519 specified = true; | |
11520 | |
11521 switch (name.begin[0]) | |
11522 { | |
11523 case 'a': | |
11524 if (name == PUGIXML_TEXT("ancestor")) | |
11525 return axis_ancestor; | |
11526 else if (name == PUGIXML_TEXT("ancestor-or-self")) | |
11527 return axis_ancestor_or_self; | |
11528 else if (name == PUGIXML_TEXT("attribute")) | |
11529 return axis_attribute; | |
11530 | |
11531 break; | |
11532 | |
11533 case 'c': | |
11534 if (name == PUGIXML_TEXT("child")) | |
11535 return axis_child; | |
11536 | |
11537 break; | |
11538 | |
11539 case 'd': | |
11540 if (name == PUGIXML_TEXT("descendant")) | |
11541 return axis_descendant; | |
11542 else if (name == PUGIXML_TEXT("descendant-or-self")) | |
11543 return axis_descendant_or_self; | |
11544 | |
11545 break; | |
11546 | |
11547 case 'f': | |
11548 if (name == PUGIXML_TEXT("following")) | |
11549 return axis_following; | |
11550 else if (name == PUGIXML_TEXT("following-sibling")) | |
11551 return axis_following_sibling; | |
11552 | |
11553 break; | |
11554 | |
11555 case 'n': | |
11556 if (name == PUGIXML_TEXT("namespace")) | |
11557 return axis_namespace; | |
11558 | |
11559 break; | |
11560 | |
11561 case 'p': | |
11562 if (name == PUGIXML_TEXT("parent")) | |
11563 return axis_parent; | |
11564 else if (name == PUGIXML_TEXT("preceding")) | |
11565 return axis_preceding; | |
11566 else if (name == PUGIXML_TEXT("preceding-sibling")) | |
11567 return axis_preceding_sibling; | |
11568 | |
11569 break; | |
11570 | |
11571 case 's': | |
11572 if (name == PUGIXML_TEXT("self")) | |
11573 return axis_self; | |
11574 | |
11575 break; | |
11576 | |
11577 default: | |
11578 break; | |
11579 } | |
11580 | |
11581 specified = false; | |
11582 return axis_child; | |
11583 } | |
11584 | |
11585 nodetest_t parse_node_test_type(const xpath_lexer_string& name) | |
11586 { | |
11587 switch (name.begin[0]) | |
11588 { | |
11589 case 'c': | |
11590 if (name == PUGIXML_TEXT("comment")) | |
11591 return nodetest_type_comment; | |
11592 | |
11593 break; | |
11594 | |
11595 case 'n': | |
11596 if (name == PUGIXML_TEXT("node")) | |
11597 return nodetest_type_node; | |
11598 | |
11599 break; | |
11600 | |
11601 case 'p': | |
11602 if (name == PUGIXML_TEXT("processing-instruction")) | |
11603 return nodetest_type_pi; | |
11604 | |
11605 break; | |
11606 | |
11607 case 't': | |
11608 if (name == PUGIXML_TEXT("text")) | |
11609 return nodetest_type_text; | |
11610 | |
11611 break; | |
11612 | |
11613 default: | |
11614 break; | |
11615 } | |
11616 | |
11617 return nodetest_none; | |
11618 } | |
11619 | |
11620 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall | |
11621 xpath_ast_node* parse_primary_expression() | |
11622 { | |
11623 switch (_lexer.current()) | |
11624 { | |
11625 case lex_var_ref: | |
11626 { | |
11627 xpath_lexer_string name = _lexer.contents(); | |
11628 | |
11629 if (!_variables) | |
11630 return error("Unknown variable: variable set is not provided"); | |
11631 | |
11632 xpath_variable* var = 0; | |
11633 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) | |
11634 return error_oom(); | |
11635 | |
11636 if (!var) | |
11637 return error("Unknown variable: variable set does not contain the given name"); | |
11638 | |
11639 _lexer.next(); | |
11640 | |
11641 return alloc_node(ast_variable, var->type(), var); | |
11642 } | |
11643 | |
11644 case lex_open_brace: | |
11645 { | |
11646 _lexer.next(); | |
11647 | |
11648 xpath_ast_node* n = parse_expression(); | |
11649 if (!n) return 0; | |
11650 | |
11651 if (_lexer.current() != lex_close_brace) | |
11652 return error("Expected ')' to match an opening '('"); | |
11653 | |
11654 _lexer.next(); | |
11655 | |
11656 return n; | |
11657 } | |
11658 | |
11659 case lex_quoted_string: | |
11660 { | |
11661 const char_t* value = alloc_string(_lexer.contents()); | |
11662 if (!value) return 0; | |
11663 | |
11664 _lexer.next(); | |
11665 | |
11666 return alloc_node(ast_string_constant, xpath_type_string, value); | |
11667 } | |
11668 | |
11669 case lex_number: | |
11670 { | |
11671 double value = 0; | |
11672 | |
11673 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) | |
11674 return error_oom(); | |
11675 | |
11676 _lexer.next(); | |
11677 | |
11678 return alloc_node(ast_number_constant, xpath_type_number, value); | |
11679 } | |
11680 | |
11681 case lex_string: | |
11682 { | |
11683 xpath_ast_node* args[2] = {0}; | |
11684 size_t argc = 0; | |
11685 | |
11686 xpath_lexer_string function = _lexer.contents(); | |
11687 _lexer.next(); | |
11688 | |
11689 xpath_ast_node* last_arg = 0; | |
11690 | |
11691 if (_lexer.current() != lex_open_brace) | |
11692 return error("Unrecognized function call"); | |
11693 _lexer.next(); | |
11694 | |
11695 size_t old_depth = _depth; | |
11696 | |
11697 while (_lexer.current() != lex_close_brace) | |
11698 { | |
11699 if (argc > 0) | |
11700 { | |
11701 if (_lexer.current() != lex_comma) | |
11702 return error("No comma between function arguments"); | |
11703 _lexer.next(); | |
11704 } | |
11705 | |
11706 if (++_depth > xpath_ast_depth_limit) | |
11707 return error_rec(); | |
11708 | |
11709 xpath_ast_node* n = parse_expression(); | |
11710 if (!n) return 0; | |
11711 | |
11712 if (argc < 2) args[argc] = n; | |
11713 else last_arg->set_next(n); | |
11714 | |
11715 argc++; | |
11716 last_arg = n; | |
11717 } | |
11718 | |
11719 _lexer.next(); | |
11720 | |
11721 _depth = old_depth; | |
11722 | |
11723 return parse_function(function, argc, args); | |
11724 } | |
11725 | |
11726 default: | |
11727 return error("Unrecognizable primary expression"); | |
11728 } | |
11729 } | |
11730 | |
11731 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate | |
11732 // Predicate ::= '[' PredicateExpr ']' | |
11733 // PredicateExpr ::= Expr | |
11734 xpath_ast_node* parse_filter_expression() | |
11735 { | |
11736 xpath_ast_node* n = parse_primary_expression(); | |
11737 if (!n) return 0; | |
11738 | |
11739 size_t old_depth = _depth; | |
11740 | |
11741 while (_lexer.current() == lex_open_square_brace) | |
11742 { | |
11743 _lexer.next(); | |
11744 | |
11745 if (++_depth > xpath_ast_depth_limit) | |
11746 return error_rec(); | |
11747 | |
11748 if (n->rettype() != xpath_type_node_set) | |
11749 return error("Predicate has to be applied to node set"); | |
11750 | |
11751 xpath_ast_node* expr = parse_expression(); | |
11752 if (!expr) return 0; | |
11753 | |
11754 n = alloc_node(ast_filter, n, expr, predicate_default); | |
11755 if (!n) return 0; | |
11756 | |
11757 if (_lexer.current() != lex_close_square_brace) | |
11758 return error("Expected ']' to match an opening '['"); | |
11759 | |
11760 _lexer.next(); | |
11761 } | |
11762 | |
11763 _depth = old_depth; | |
11764 | |
11765 return n; | |
11766 } | |
11767 | |
11768 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep | |
11769 // AxisSpecifier ::= AxisName '::' | '@'? | |
11770 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' | |
11771 // NameTest ::= '*' | NCName ':' '*' | QName | |
11772 // AbbreviatedStep ::= '.' | '..' | |
11773 xpath_ast_node* parse_step(xpath_ast_node* set) | |
11774 { | |
11775 if (set && set->rettype() != xpath_type_node_set) | |
11776 return error("Step has to be applied to node set"); | |
11777 | |
11778 bool axis_specified = false; | |
11779 axis_t axis = axis_child; // implied child axis | |
11780 | |
11781 if (_lexer.current() == lex_axis_attribute) | |
11782 { | |
11783 axis = axis_attribute; | |
11784 axis_specified = true; | |
11785 | |
11786 _lexer.next(); | |
11787 } | |
11788 else if (_lexer.current() == lex_dot) | |
11789 { | |
11790 _lexer.next(); | |
11791 | |
11792 if (_lexer.current() == lex_open_square_brace) | |
11793 return error("Predicates are not allowed after an abbreviated step"); | |
11794 | |
11795 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); | |
11796 } | |
11797 else if (_lexer.current() == lex_double_dot) | |
11798 { | |
11799 _lexer.next(); | |
11800 | |
11801 if (_lexer.current() == lex_open_square_brace) | |
11802 return error("Predicates are not allowed after an abbreviated step"); | |
11803 | |
11804 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); | |
11805 } | |
11806 | |
11807 nodetest_t nt_type = nodetest_none; | |
11808 xpath_lexer_string nt_name; | |
11809 | |
11810 if (_lexer.current() == lex_string) | |
11811 { | |
11812 // node name test | |
11813 nt_name = _lexer.contents(); | |
11814 _lexer.next(); | |
11815 | |
11816 // was it an axis name? | |
11817 if (_lexer.current() == lex_double_colon) | |
11818 { | |
11819 // parse axis name | |
11820 if (axis_specified) | |
11821 return error("Two axis specifiers in one step"); | |
11822 | |
11823 axis = parse_axis_name(nt_name, axis_specified); | |
11824 | |
11825 if (!axis_specified) | |
11826 return error("Unknown axis"); | |
11827 | |
11828 // read actual node test | |
11829 _lexer.next(); | |
11830 | |
11831 if (_lexer.current() == lex_multiply) | |
11832 { | |
11833 nt_type = nodetest_all; | |
11834 nt_name = xpath_lexer_string(); | |
11835 _lexer.next(); | |
11836 } | |
11837 else if (_lexer.current() == lex_string) | |
11838 { | |
11839 nt_name = _lexer.contents(); | |
11840 _lexer.next(); | |
11841 } | |
11842 else | |
11843 { | |
11844 return error("Unrecognized node test"); | |
11845 } | |
11846 } | |
11847 | |
11848 if (nt_type == nodetest_none) | |
11849 { | |
11850 // node type test or processing-instruction | |
11851 if (_lexer.current() == lex_open_brace) | |
11852 { | |
11853 _lexer.next(); | |
11854 | |
11855 if (_lexer.current() == lex_close_brace) | |
11856 { | |
11857 _lexer.next(); | |
11858 | |
11859 nt_type = parse_node_test_type(nt_name); | |
11860 | |
11861 if (nt_type == nodetest_none) | |
11862 return error("Unrecognized node type"); | |
11863 | |
11864 nt_name = xpath_lexer_string(); | |
11865 } | |
11866 else if (nt_name == PUGIXML_TEXT("processing-instruction")) | |
11867 { | |
11868 if (_lexer.current() != lex_quoted_string) | |
11869 return error("Only literals are allowed as arguments to processing-instruction()"); | |
11870 | |
11871 nt_type = nodetest_pi; | |
11872 nt_name = _lexer.contents(); | |
11873 _lexer.next(); | |
11874 | |
11875 if (_lexer.current() != lex_close_brace) | |
11876 return error("Unmatched brace near processing-instruction()"); | |
11877 _lexer.next(); | |
11878 } | |
11879 else | |
11880 { | |
11881 return error("Unmatched brace near node type test"); | |
11882 } | |
11883 } | |
11884 // QName or NCName:* | |
11885 else | |
11886 { | |
11887 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* | |
11888 { | |
11889 nt_name.end--; // erase * | |
11890 | |
11891 nt_type = nodetest_all_in_namespace; | |
11892 } | |
11893 else | |
11894 { | |
11895 nt_type = nodetest_name; | |
11896 } | |
11897 } | |
11898 } | |
11899 } | |
11900 else if (_lexer.current() == lex_multiply) | |
11901 { | |
11902 nt_type = nodetest_all; | |
11903 _lexer.next(); | |
11904 } | |
11905 else | |
11906 { | |
11907 return error("Unrecognized node test"); | |
11908 } | |
11909 | |
11910 const char_t* nt_name_copy = alloc_string(nt_name); | |
11911 if (!nt_name_copy) return 0; | |
11912 | |
11913 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); | |
11914 if (!n) return 0; | |
11915 | |
11916 size_t old_depth = _depth; | |
11917 | |
11918 xpath_ast_node* last = 0; | |
11919 | |
11920 while (_lexer.current() == lex_open_square_brace) | |
11921 { | |
11922 _lexer.next(); | |
11923 | |
11924 if (++_depth > xpath_ast_depth_limit) | |
11925 return error_rec(); | |
11926 | |
11927 xpath_ast_node* expr = parse_expression(); | |
11928 if (!expr) return 0; | |
11929 | |
11930 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); | |
11931 if (!pred) return 0; | |
11932 | |
11933 if (_lexer.current() != lex_close_square_brace) | |
11934 return error("Expected ']' to match an opening '['"); | |
11935 _lexer.next(); | |
11936 | |
11937 if (last) last->set_next(pred); | |
11938 else n->set_right(pred); | |
11939 | |
11940 last = pred; | |
11941 } | |
11942 | |
11943 _depth = old_depth; | |
11944 | |
11945 return n; | |
11946 } | |
11947 | |
11948 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step | |
11949 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) | |
11950 { | |
11951 xpath_ast_node* n = parse_step(set); | |
11952 if (!n) return 0; | |
11953 | |
11954 size_t old_depth = _depth; | |
11955 | |
11956 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
11957 { | |
11958 lexeme_t l = _lexer.current(); | |
11959 _lexer.next(); | |
11960 | |
11961 if (l == lex_double_slash) | |
11962 { | |
11963 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
11964 if (!n) return 0; | |
11965 | |
11966 ++_depth; | |
11967 } | |
11968 | |
11969 if (++_depth > xpath_ast_depth_limit) | |
11970 return error_rec(); | |
11971 | |
11972 n = parse_step(n); | |
11973 if (!n) return 0; | |
11974 } | |
11975 | |
11976 _depth = old_depth; | |
11977 | |
11978 return n; | |
11979 } | |
11980 | |
11981 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath | |
11982 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath | |
11983 xpath_ast_node* parse_location_path() | |
11984 { | |
11985 if (_lexer.current() == lex_slash) | |
11986 { | |
11987 _lexer.next(); | |
11988 | |
11989 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
11990 if (!n) return 0; | |
11991 | |
11992 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path | |
11993 lexeme_t l = _lexer.current(); | |
11994 | |
11995 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) | |
11996 return parse_relative_location_path(n); | |
11997 else | |
11998 return n; | |
11999 } | |
12000 else if (_lexer.current() == lex_double_slash) | |
12001 { | |
12002 _lexer.next(); | |
12003 | |
12004 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
12005 if (!n) return 0; | |
12006 | |
12007 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
12008 if (!n) return 0; | |
12009 | |
12010 return parse_relative_location_path(n); | |
12011 } | |
12012 | |
12013 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 | |
12014 return parse_relative_location_path(0); | |
12015 } | |
12016 | |
12017 // PathExpr ::= LocationPath | |
12018 // | FilterExpr | |
12019 // | FilterExpr '/' RelativeLocationPath | |
12020 // | FilterExpr '//' RelativeLocationPath | |
12021 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr | |
12022 // UnaryExpr ::= UnionExpr | '-' UnaryExpr | |
12023 xpath_ast_node* parse_path_or_unary_expression() | |
12024 { | |
12025 // Clarification. | |
12026 // PathExpr begins with either LocationPath or FilterExpr. | |
12027 // FilterExpr begins with PrimaryExpr | |
12028 // PrimaryExpr begins with '$' in case of it being a variable reference, | |
12029 // '(' in case of it being an expression, string literal, number constant or | |
12030 // function call. | |
12031 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || | |
12032 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || | |
12033 _lexer.current() == lex_string) | |
12034 { | |
12035 if (_lexer.current() == lex_string) | |
12036 { | |
12037 // This is either a function call, or not - if not, we shall proceed with location path | |
12038 const char_t* state = _lexer.state(); | |
12039 | |
12040 while (PUGI__IS_CHARTYPE(*state, ct_space)) ++state; | |
12041 | |
12042 if (*state != '(') | |
12043 return parse_location_path(); | |
12044 | |
12045 // This looks like a function call; however this still can be a node-test. Check it. | |
12046 if (parse_node_test_type(_lexer.contents()) != nodetest_none) | |
12047 return parse_location_path(); | |
12048 } | |
12049 | |
12050 xpath_ast_node* n = parse_filter_expression(); | |
12051 if (!n) return 0; | |
12052 | |
12053 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
12054 { | |
12055 lexeme_t l = _lexer.current(); | |
12056 _lexer.next(); | |
12057 | |
12058 if (l == lex_double_slash) | |
12059 { | |
12060 if (n->rettype() != xpath_type_node_set) | |
12061 return error("Step has to be applied to node set"); | |
12062 | |
12063 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
12064 if (!n) return 0; | |
12065 } | |
12066 | |
12067 // select from location path | |
12068 return parse_relative_location_path(n); | |
12069 } | |
12070 | |
12071 return n; | |
12072 } | |
12073 else if (_lexer.current() == lex_minus) | |
12074 { | |
12075 _lexer.next(); | |
12076 | |
12077 // precedence 7+ - only parses union expressions | |
12078 xpath_ast_node* n = parse_expression(7); | |
12079 if (!n) return 0; | |
12080 | |
12081 return alloc_node(ast_op_negate, xpath_type_number, n); | |
12082 } | |
12083 else | |
12084 { | |
12085 return parse_location_path(); | |
12086 } | |
12087 } | |
12088 | |
12089 struct binary_op_t | |
12090 { | |
12091 ast_type_t asttype; | |
12092 xpath_value_type rettype; | |
12093 int precedence; | |
12094 | |
12095 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) | |
12096 { | |
12097 } | |
12098 | |
12099 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) | |
12100 { | |
12101 } | |
12102 | |
12103 static binary_op_t parse(xpath_lexer& lexer) | |
12104 { | |
12105 switch (lexer.current()) | |
12106 { | |
12107 case lex_string: | |
12108 if (lexer.contents() == PUGIXML_TEXT("or")) | |
12109 return binary_op_t(ast_op_or, xpath_type_boolean, 1); | |
12110 else if (lexer.contents() == PUGIXML_TEXT("and")) | |
12111 return binary_op_t(ast_op_and, xpath_type_boolean, 2); | |
12112 else if (lexer.contents() == PUGIXML_TEXT("div")) | |
12113 return binary_op_t(ast_op_divide, xpath_type_number, 6); | |
12114 else if (lexer.contents() == PUGIXML_TEXT("mod")) | |
12115 return binary_op_t(ast_op_mod, xpath_type_number, 6); | |
12116 else | |
12117 return binary_op_t(); | |
12118 | |
12119 case lex_equal: | |
12120 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); | |
12121 | |
12122 case lex_not_equal: | |
12123 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); | |
12124 | |
12125 case lex_less: | |
12126 return binary_op_t(ast_op_less, xpath_type_boolean, 4); | |
12127 | |
12128 case lex_greater: | |
12129 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); | |
12130 | |
12131 case lex_less_or_equal: | |
12132 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); | |
12133 | |
12134 case lex_greater_or_equal: | |
12135 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); | |
12136 | |
12137 case lex_plus: | |
12138 return binary_op_t(ast_op_add, xpath_type_number, 5); | |
12139 | |
12140 case lex_minus: | |
12141 return binary_op_t(ast_op_subtract, xpath_type_number, 5); | |
12142 | |
12143 case lex_multiply: | |
12144 return binary_op_t(ast_op_multiply, xpath_type_number, 6); | |
12145 | |
12146 case lex_union: | |
12147 return binary_op_t(ast_op_union, xpath_type_node_set, 7); | |
12148 | |
12149 default: | |
12150 return binary_op_t(); | |
12151 } | |
12152 } | |
12153 }; | |
12154 | |
12155 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) | |
12156 { | |
12157 binary_op_t op = binary_op_t::parse(_lexer); | |
12158 | |
12159 while (op.asttype != ast_unknown && op.precedence >= limit) | |
12160 { | |
12161 _lexer.next(); | |
12162 | |
12163 if (++_depth > xpath_ast_depth_limit) | |
12164 return error_rec(); | |
12165 | |
12166 xpath_ast_node* rhs = parse_path_or_unary_expression(); | |
12167 if (!rhs) return 0; | |
12168 | |
12169 binary_op_t nextop = binary_op_t::parse(_lexer); | |
12170 | |
12171 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) | |
12172 { | |
12173 rhs = parse_expression_rec(rhs, nextop.precedence); | |
12174 if (!rhs) return 0; | |
12175 | |
12176 nextop = binary_op_t::parse(_lexer); | |
12177 } | |
12178 | |
12179 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) | |
12180 return error("Union operator has to be applied to node sets"); | |
12181 | |
12182 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); | |
12183 if (!lhs) return 0; | |
12184 | |
12185 op = binary_op_t::parse(_lexer); | |
12186 } | |
12187 | |
12188 return lhs; | |
12189 } | |
12190 | |
12191 // Expr ::= OrExpr | |
12192 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr | |
12193 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr | |
12194 // EqualityExpr ::= RelationalExpr | |
12195 // | EqualityExpr '=' RelationalExpr | |
12196 // | EqualityExpr '!=' RelationalExpr | |
12197 // RelationalExpr ::= AdditiveExpr | |
12198 // | RelationalExpr '<' AdditiveExpr | |
12199 // | RelationalExpr '>' AdditiveExpr | |
12200 // | RelationalExpr '<=' AdditiveExpr | |
12201 // | RelationalExpr '>=' AdditiveExpr | |
12202 // AdditiveExpr ::= MultiplicativeExpr | |
12203 // | AdditiveExpr '+' MultiplicativeExpr | |
12204 // | AdditiveExpr '-' MultiplicativeExpr | |
12205 // MultiplicativeExpr ::= UnaryExpr | |
12206 // | MultiplicativeExpr '*' UnaryExpr | |
12207 // | MultiplicativeExpr 'div' UnaryExpr | |
12208 // | MultiplicativeExpr 'mod' UnaryExpr | |
12209 xpath_ast_node* parse_expression(int limit = 0) | |
12210 { | |
12211 size_t old_depth = _depth; | |
12212 | |
12213 if (++_depth > xpath_ast_depth_limit) | |
12214 return error_rec(); | |
12215 | |
12216 xpath_ast_node* n = parse_path_or_unary_expression(); | |
12217 if (!n) return 0; | |
12218 | |
12219 n = parse_expression_rec(n, limit); | |
12220 | |
12221 _depth = old_depth; | |
12222 | |
12223 return n; | |
12224 } | |
12225 | |
12226 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) | |
12227 { | |
12228 } | |
12229 | |
12230 xpath_ast_node* parse() | |
12231 { | |
12232 xpath_ast_node* n = parse_expression(); | |
12233 if (!n) return 0; | |
12234 | |
12235 assert(_depth == 0); | |
12236 | |
12237 // check if there are unparsed tokens left | |
12238 if (_lexer.current() != lex_eof) | |
12239 return error("Incorrect query"); | |
12240 | |
12241 return n; | |
12242 } | |
12243 | |
12244 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) | |
12245 { | |
12246 xpath_parser parser(query, variables, alloc, result); | |
12247 | |
12248 return parser.parse(); | |
12249 } | |
12250 }; | |
12251 | |
12252 struct xpath_query_impl | |
12253 { | |
12254 static xpath_query_impl* create() | |
12255 { | |
12256 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); | |
12257 if (!memory) return 0; | |
12258 | |
12259 return new (memory) xpath_query_impl(); | |
12260 } | |
12261 | |
12262 static void destroy(xpath_query_impl* impl) | |
12263 { | |
12264 // free all allocated pages | |
12265 impl->alloc.release(); | |
12266 | |
12267 // free allocator memory (with the first page) | |
12268 xml_memory::deallocate(impl); | |
12269 } | |
12270 | |
12271 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) | |
12272 { | |
12273 block.next = 0; | |
12274 block.capacity = sizeof(block.data); | |
12275 } | |
12276 | |
12277 xpath_ast_node* root; | |
12278 xpath_allocator alloc; | |
12279 xpath_memory_block block; | |
12280 bool oom; | |
12281 }; | |
12282 | |
12283 PUGI__FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) | |
12284 { | |
12285 if (!impl) return 0; | |
12286 | |
12287 if (impl->root->rettype() != xpath_type_node_set) | |
12288 { | |
12289 #ifdef PUGIXML_NO_EXCEPTIONS | |
12290 return 0; | |
12291 #else | |
12292 xpath_parse_result res; | |
12293 res.error = "Expression does not evaluate to node set"; | |
12294 | |
12295 throw xpath_exception(res); | |
12296 #endif | |
12297 } | |
12298 | |
12299 return impl->root; | |
12300 } | |
12301 PUGI__NS_END | |
12302 | |
12303 namespace pugi | |
12304 { | |
12305 #ifndef PUGIXML_NO_EXCEPTIONS | |
12306 PUGI__FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) | |
12307 { | |
12308 assert(_result.error); | |
12309 } | |
12310 | |
12311 PUGI__FN const char* xpath_exception::what() const throw() | |
12312 { | |
12313 return _result.error; | |
12314 } | |
12315 | |
12316 PUGI__FN const xpath_parse_result& xpath_exception::result() const | |
12317 { | |
12318 return _result; | |
12319 } | |
12320 #endif | |
12321 | |
12322 PUGI__FN xpath_node::xpath_node() | |
12323 { | |
12324 } | |
12325 | |
12326 PUGI__FN xpath_node::xpath_node(const xml_node& node_): _node(node_) | |
12327 { | |
12328 } | |
12329 | |
12330 PUGI__FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) | |
12331 { | |
12332 } | |
12333 | |
12334 PUGI__FN xml_node xpath_node::node() const | |
12335 { | |
12336 return _attribute ? xml_node() : _node; | |
12337 } | |
12338 | |
12339 PUGI__FN xml_attribute xpath_node::attribute() const | |
12340 { | |
12341 return _attribute; | |
12342 } | |
12343 | |
12344 PUGI__FN xml_node xpath_node::parent() const | |
12345 { | |
12346 return _attribute ? _node : _node.parent(); | |
12347 } | |
12348 | |
12349 PUGI__FN static void unspecified_bool_xpath_node(xpath_node***) | |
12350 { | |
12351 } | |
12352 | |
12353 PUGI__FN xpath_node::operator xpath_node::unspecified_bool_type() const | |
12354 { | |
12355 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; | |
12356 } | |
12357 | |
12358 PUGI__FN bool xpath_node::operator!() const | |
12359 { | |
12360 return !(_node || _attribute); | |
12361 } | |
12362 | |
12363 PUGI__FN bool xpath_node::operator==(const xpath_node& n) const | |
12364 { | |
12365 return _node == n._node && _attribute == n._attribute; | |
12366 } | |
12367 | |
12368 PUGI__FN bool xpath_node::operator!=(const xpath_node& n) const | |
12369 { | |
12370 return _node != n._node || _attribute != n._attribute; | |
12371 } | |
12372 | |
12373 #ifdef __BORLANDC__ | |
12374 PUGI__FN bool operator&&(const xpath_node& lhs, bool rhs) | |
12375 { | |
12376 return (bool)lhs && rhs; | |
12377 } | |
12378 | |
12379 PUGI__FN bool operator||(const xpath_node& lhs, bool rhs) | |
12380 { | |
12381 return (bool)lhs || rhs; | |
12382 } | |
12383 #endif | |
12384 | |
12385 PUGI__FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) | |
12386 { | |
12387 assert(begin_ <= end_); | |
12388 | |
12389 size_t size_ = static_cast<size_t>(end_ - begin_); | |
12390 | |
12391 // use internal buffer for 0 or 1 elements, heap buffer otherwise | |
12392 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); | |
12393 | |
12394 if (!storage) | |
12395 { | |
12396 #ifdef PUGIXML_NO_EXCEPTIONS | |
12397 return; | |
12398 #else | |
12399 throw std::bad_alloc(); | |
12400 #endif | |
12401 } | |
12402 | |
12403 // deallocate old buffer | |
12404 if (_begin != _storage) | |
12405 impl::xml_memory::deallocate(_begin); | |
12406 | |
12407 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB | |
12408 if (size_) | |
12409 memcpy(storage, begin_, size_ * sizeof(xpath_node)); | |
12410 | |
12411 _begin = storage; | |
12412 _end = storage + size_; | |
12413 _type = type_; | |
12414 } | |
12415 | |
12416 #ifdef PUGIXML_HAS_MOVE | |
12417 PUGI__FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT | |
12418 { | |
12419 _type = rhs._type; | |
12420 _storage[0] = rhs._storage[0]; | |
12421 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin; | |
12422 _end = _begin + (rhs._end - rhs._begin); | |
12423 | |
12424 rhs._type = type_unsorted; | |
12425 rhs._begin = rhs._storage; | |
12426 rhs._end = rhs._storage; | |
12427 } | |
12428 #endif | |
12429 | |
12430 PUGI__FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12431 { | |
12432 } | |
12433 | |
12434 PUGI__FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12435 { | |
12436 _assign(begin_, end_, type_); | |
12437 } | |
12438 | |
12439 PUGI__FN xpath_node_set::~xpath_node_set() | |
12440 { | |
12441 if (_begin != _storage) | |
12442 impl::xml_memory::deallocate(_begin); | |
12443 } | |
12444 | |
12445 PUGI__FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12446 { | |
12447 _assign(ns._begin, ns._end, ns._type); | |
12448 } | |
12449 | |
12450 PUGI__FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) | |
12451 { | |
12452 if (this == &ns) return *this; | |
12453 | |
12454 _assign(ns._begin, ns._end, ns._type); | |
12455 | |
12456 return *this; | |
12457 } | |
12458 | |
12459 #ifdef PUGIXML_HAS_MOVE | |
12460 PUGI__FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage) | |
12461 { | |
12462 _move(rhs); | |
12463 } | |
12464 | |
12465 PUGI__FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT | |
12466 { | |
12467 if (this == &rhs) return *this; | |
12468 | |
12469 if (_begin != _storage) | |
12470 impl::xml_memory::deallocate(_begin); | |
12471 | |
12472 _move(rhs); | |
12473 | |
12474 return *this; | |
12475 } | |
12476 #endif | |
12477 | |
12478 PUGI__FN xpath_node_set::type_t xpath_node_set::type() const | |
12479 { | |
12480 return _type; | |
12481 } | |
12482 | |
12483 PUGI__FN size_t xpath_node_set::size() const | |
12484 { | |
12485 return _end - _begin; | |
12486 } | |
12487 | |
12488 PUGI__FN bool xpath_node_set::empty() const | |
12489 { | |
12490 return _begin == _end; | |
12491 } | |
12492 | |
12493 PUGI__FN const xpath_node& xpath_node_set::operator[](size_t index) const | |
12494 { | |
12495 assert(index < size()); | |
12496 return _begin[index]; | |
12497 } | |
12498 | |
12499 PUGI__FN xpath_node_set::const_iterator xpath_node_set::begin() const | |
12500 { | |
12501 return _begin; | |
12502 } | |
12503 | |
12504 PUGI__FN xpath_node_set::const_iterator xpath_node_set::end() const | |
12505 { | |
12506 return _end; | |
12507 } | |
12508 | |
12509 PUGI__FN void xpath_node_set::sort(bool reverse) | |
12510 { | |
12511 _type = impl::xpath_sort(_begin, _end, _type, reverse); | |
12512 } | |
12513 | |
12514 PUGI__FN xpath_node xpath_node_set::first() const | |
12515 { | |
12516 return impl::xpath_first(_begin, _end, _type); | |
12517 } | |
12518 | |
12519 PUGI__FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) | |
12520 { | |
12521 } | |
12522 | |
12523 PUGI__FN xpath_parse_result::operator bool() const | |
12524 { | |
12525 return error == 0; | |
12526 } | |
12527 | |
12528 PUGI__FN const char* xpath_parse_result::description() const | |
12529 { | |
12530 return error ? error : "No error"; | |
12531 } | |
12532 | |
12533 PUGI__FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) | |
12534 { | |
12535 } | |
12536 | |
12537 PUGI__FN const char_t* xpath_variable::name() const | |
12538 { | |
12539 switch (_type) | |
12540 { | |
12541 case xpath_type_node_set: | |
12542 return static_cast<const impl::xpath_variable_node_set*>(this)->name; | |
12543 | |
12544 case xpath_type_number: | |
12545 return static_cast<const impl::xpath_variable_number*>(this)->name; | |
12546 | |
12547 case xpath_type_string: | |
12548 return static_cast<const impl::xpath_variable_string*>(this)->name; | |
12549 | |
12550 case xpath_type_boolean: | |
12551 return static_cast<const impl::xpath_variable_boolean*>(this)->name; | |
12552 | |
12553 default: | |
12554 assert(false && "Invalid variable type"); // unreachable | |
12555 return 0; | |
12556 } | |
12557 } | |
12558 | |
12559 PUGI__FN xpath_value_type xpath_variable::type() const | |
12560 { | |
12561 return _type; | |
12562 } | |
12563 | |
12564 PUGI__FN bool xpath_variable::get_boolean() const | |
12565 { | |
12566 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; | |
12567 } | |
12568 | |
12569 PUGI__FN double xpath_variable::get_number() const | |
12570 { | |
12571 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); | |
12572 } | |
12573 | |
12574 PUGI__FN const char_t* xpath_variable::get_string() const | |
12575 { | |
12576 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; | |
12577 return value ? value : PUGIXML_TEXT(""); | |
12578 } | |
12579 | |
12580 PUGI__FN const xpath_node_set& xpath_variable::get_node_set() const | |
12581 { | |
12582 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; | |
12583 } | |
12584 | |
12585 PUGI__FN bool xpath_variable::set(bool value) | |
12586 { | |
12587 if (_type != xpath_type_boolean) return false; | |
12588 | |
12589 static_cast<impl::xpath_variable_boolean*>(this)->value = value; | |
12590 return true; | |
12591 } | |
12592 | |
12593 PUGI__FN bool xpath_variable::set(double value) | |
12594 { | |
12595 if (_type != xpath_type_number) return false; | |
12596 | |
12597 static_cast<impl::xpath_variable_number*>(this)->value = value; | |
12598 return true; | |
12599 } | |
12600 | |
12601 PUGI__FN bool xpath_variable::set(const char_t* value) | |
12602 { | |
12603 if (_type != xpath_type_string) return false; | |
12604 | |
12605 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); | |
12606 | |
12607 // duplicate string | |
12608 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); | |
12609 | |
12610 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); | |
12611 if (!copy) return false; | |
12612 | |
12613 memcpy(copy, value, size); | |
12614 | |
12615 // replace old string | |
12616 if (var->value) impl::xml_memory::deallocate(var->value); | |
12617 var->value = copy; | |
12618 | |
12619 return true; | |
12620 } | |
12621 | |
12622 PUGI__FN bool xpath_variable::set(const xpath_node_set& value) | |
12623 { | |
12624 if (_type != xpath_type_node_set) return false; | |
12625 | |
12626 static_cast<impl::xpath_variable_node_set*>(this)->value = value; | |
12627 return true; | |
12628 } | |
12629 | |
12630 PUGI__FN xpath_variable_set::xpath_variable_set() | |
12631 { | |
12632 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12633 _data[i] = 0; | |
12634 } | |
12635 | |
12636 PUGI__FN xpath_variable_set::~xpath_variable_set() | |
12637 { | |
12638 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12639 _destroy(_data[i]); | |
12640 } | |
12641 | |
12642 PUGI__FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) | |
12643 { | |
12644 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12645 _data[i] = 0; | |
12646 | |
12647 _assign(rhs); | |
12648 } | |
12649 | |
12650 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) | |
12651 { | |
12652 if (this == &rhs) return *this; | |
12653 | |
12654 _assign(rhs); | |
12655 | |
12656 return *this; | |
12657 } | |
12658 | |
12659 #ifdef PUGIXML_HAS_MOVE | |
12660 PUGI__FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
12661 { | |
12662 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12663 { | |
12664 _data[i] = rhs._data[i]; | |
12665 rhs._data[i] = 0; | |
12666 } | |
12667 } | |
12668 | |
12669 PUGI__FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
12670 { | |
12671 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12672 { | |
12673 _destroy(_data[i]); | |
12674 | |
12675 _data[i] = rhs._data[i]; | |
12676 rhs._data[i] = 0; | |
12677 } | |
12678 | |
12679 return *this; | |
12680 } | |
12681 #endif | |
12682 | |
12683 PUGI__FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) | |
12684 { | |
12685 xpath_variable_set temp; | |
12686 | |
12687 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12688 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) | |
12689 return; | |
12690 | |
12691 _swap(temp); | |
12692 } | |
12693 | |
12694 PUGI__FN void xpath_variable_set::_swap(xpath_variable_set& rhs) | |
12695 { | |
12696 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12697 { | |
12698 xpath_variable* chain = _data[i]; | |
12699 | |
12700 _data[i] = rhs._data[i]; | |
12701 rhs._data[i] = chain; | |
12702 } | |
12703 } | |
12704 | |
12705 PUGI__FN xpath_variable* xpath_variable_set::_find(const char_t* name) const | |
12706 { | |
12707 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
12708 size_t hash = impl::hash_string(name) % hash_size; | |
12709 | |
12710 // look for existing variable | |
12711 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
12712 if (impl::strequal(var->name(), name)) | |
12713 return var; | |
12714 | |
12715 return 0; | |
12716 } | |
12717 | |
12718 PUGI__FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) | |
12719 { | |
12720 xpath_variable* last = 0; | |
12721 | |
12722 while (var) | |
12723 { | |
12724 // allocate storage for new variable | |
12725 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); | |
12726 if (!nvar) return false; | |
12727 | |
12728 // link the variable to the result immediately to handle failures gracefully | |
12729 if (last) | |
12730 last->_next = nvar; | |
12731 else | |
12732 *out_result = nvar; | |
12733 | |
12734 last = nvar; | |
12735 | |
12736 // copy the value; this can fail due to out-of-memory conditions | |
12737 if (!impl::copy_xpath_variable(nvar, var)) return false; | |
12738 | |
12739 var = var->_next; | |
12740 } | |
12741 | |
12742 return true; | |
12743 } | |
12744 | |
12745 PUGI__FN void xpath_variable_set::_destroy(xpath_variable* var) | |
12746 { | |
12747 while (var) | |
12748 { | |
12749 xpath_variable* next = var->_next; | |
12750 | |
12751 impl::delete_xpath_variable(var->_type, var); | |
12752 | |
12753 var = next; | |
12754 } | |
12755 } | |
12756 | |
12757 PUGI__FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) | |
12758 { | |
12759 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
12760 size_t hash = impl::hash_string(name) % hash_size; | |
12761 | |
12762 // look for existing variable | |
12763 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
12764 if (impl::strequal(var->name(), name)) | |
12765 return var->type() == type ? var : 0; | |
12766 | |
12767 // add new variable | |
12768 xpath_variable* result = impl::new_xpath_variable(type, name); | |
12769 | |
12770 if (result) | |
12771 { | |
12772 result->_next = _data[hash]; | |
12773 | |
12774 _data[hash] = result; | |
12775 } | |
12776 | |
12777 return result; | |
12778 } | |
12779 | |
12780 PUGI__FN bool xpath_variable_set::set(const char_t* name, bool value) | |
12781 { | |
12782 xpath_variable* var = add(name, xpath_type_boolean); | |
12783 return var ? var->set(value) : false; | |
12784 } | |
12785 | |
12786 PUGI__FN bool xpath_variable_set::set(const char_t* name, double value) | |
12787 { | |
12788 xpath_variable* var = add(name, xpath_type_number); | |
12789 return var ? var->set(value) : false; | |
12790 } | |
12791 | |
12792 PUGI__FN bool xpath_variable_set::set(const char_t* name, const char_t* value) | |
12793 { | |
12794 xpath_variable* var = add(name, xpath_type_string); | |
12795 return var ? var->set(value) : false; | |
12796 } | |
12797 | |
12798 PUGI__FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) | |
12799 { | |
12800 xpath_variable* var = add(name, xpath_type_node_set); | |
12801 return var ? var->set(value) : false; | |
12802 } | |
12803 | |
12804 PUGI__FN xpath_variable* xpath_variable_set::get(const char_t* name) | |
12805 { | |
12806 return _find(name); | |
12807 } | |
12808 | |
12809 PUGI__FN const xpath_variable* xpath_variable_set::get(const char_t* name) const | |
12810 { | |
12811 return _find(name); | |
12812 } | |
12813 | |
12814 PUGI__FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) | |
12815 { | |
12816 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); | |
12817 | |
12818 if (!qimpl) | |
12819 { | |
12820 #ifdef PUGIXML_NO_EXCEPTIONS | |
12821 _result.error = "Out of memory"; | |
12822 #else | |
12823 throw std::bad_alloc(); | |
12824 #endif | |
12825 } | |
12826 else | |
12827 { | |
12828 using impl::auto_deleter; // MSVC7 workaround | |
12829 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); | |
12830 | |
12831 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); | |
12832 | |
12833 if (qimpl->root) | |
12834 { | |
12835 qimpl->root->optimize(&qimpl->alloc); | |
12836 | |
12837 _impl = impl.release(); | |
12838 _result.error = 0; | |
12839 } | |
12840 else | |
12841 { | |
12842 #ifdef PUGIXML_NO_EXCEPTIONS | |
12843 if (qimpl->oom) _result.error = "Out of memory"; | |
12844 #else | |
12845 if (qimpl->oom) throw std::bad_alloc(); | |
12846 throw xpath_exception(_result); | |
12847 #endif | |
12848 } | |
12849 } | |
12850 } | |
12851 | |
12852 PUGI__FN xpath_query::xpath_query(): _impl(0) | |
12853 { | |
12854 } | |
12855 | |
12856 PUGI__FN xpath_query::~xpath_query() | |
12857 { | |
12858 if (_impl) | |
12859 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
12860 } | |
12861 | |
12862 #ifdef PUGIXML_HAS_MOVE | |
12863 PUGI__FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
12864 { | |
12865 _impl = rhs._impl; | |
12866 _result = rhs._result; | |
12867 rhs._impl = 0; | |
12868 rhs._result = xpath_parse_result(); | |
12869 } | |
12870 | |
12871 PUGI__FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
12872 { | |
12873 if (this == &rhs) return *this; | |
12874 | |
12875 if (_impl) | |
12876 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
12877 | |
12878 _impl = rhs._impl; | |
12879 _result = rhs._result; | |
12880 rhs._impl = 0; | |
12881 rhs._result = xpath_parse_result(); | |
12882 | |
12883 return *this; | |
12884 } | |
12885 #endif | |
12886 | |
12887 PUGI__FN xpath_value_type xpath_query::return_type() const | |
12888 { | |
12889 if (!_impl) return xpath_type_none; | |
12890 | |
12891 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); | |
12892 } | |
12893 | |
12894 PUGI__FN bool xpath_query::evaluate_boolean(const xpath_node& n) const | |
12895 { | |
12896 if (!_impl) return false; | |
12897 | |
12898 impl::xpath_context c(n, 1, 1); | |
12899 impl::xpath_stack_data sd; | |
12900 | |
12901 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); | |
12902 | |
12903 if (sd.oom) | |
12904 { | |
12905 #ifdef PUGIXML_NO_EXCEPTIONS | |
12906 return false; | |
12907 #else | |
12908 throw std::bad_alloc(); | |
12909 #endif | |
12910 } | |
12911 | |
12912 return r; | |
12913 } | |
12914 | |
12915 PUGI__FN double xpath_query::evaluate_number(const xpath_node& n) const | |
12916 { | |
12917 if (!_impl) return impl::gen_nan(); | |
12918 | |
12919 impl::xpath_context c(n, 1, 1); | |
12920 impl::xpath_stack_data sd; | |
12921 | |
12922 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); | |
12923 | |
12924 if (sd.oom) | |
12925 { | |
12926 #ifdef PUGIXML_NO_EXCEPTIONS | |
12927 return impl::gen_nan(); | |
12928 #else | |
12929 throw std::bad_alloc(); | |
12930 #endif | |
12931 } | |
12932 | |
12933 return r; | |
12934 } | |
12935 | |
12936 #ifndef PUGIXML_NO_STL | |
12937 PUGI__FN string_t xpath_query::evaluate_string(const xpath_node& n) const | |
12938 { | |
12939 if (!_impl) return string_t(); | |
12940 | |
12941 impl::xpath_context c(n, 1, 1); | |
12942 impl::xpath_stack_data sd; | |
12943 | |
12944 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); | |
12945 | |
12946 if (sd.oom) | |
12947 { | |
12948 #ifdef PUGIXML_NO_EXCEPTIONS | |
12949 return string_t(); | |
12950 #else | |
12951 throw std::bad_alloc(); | |
12952 #endif | |
12953 } | |
12954 | |
12955 return string_t(r.c_str(), r.length()); | |
12956 } | |
12957 #endif | |
12958 | |
12959 PUGI__FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const | |
12960 { | |
12961 impl::xpath_context c(n, 1, 1); | |
12962 impl::xpath_stack_data sd; | |
12963 | |
12964 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); | |
12965 | |
12966 if (sd.oom) | |
12967 { | |
12968 #ifdef PUGIXML_NO_EXCEPTIONS | |
12969 r = impl::xpath_string(); | |
12970 #else | |
12971 throw std::bad_alloc(); | |
12972 #endif | |
12973 } | |
12974 | |
12975 size_t full_size = r.length() + 1; | |
12976 | |
12977 if (capacity > 0) | |
12978 { | |
12979 size_t size = (full_size < capacity) ? full_size : capacity; | |
12980 assert(size > 0); | |
12981 | |
12982 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); | |
12983 buffer[size - 1] = 0; | |
12984 } | |
12985 | |
12986 return full_size; | |
12987 } | |
12988 | |
12989 PUGI__FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const | |
12990 { | |
12991 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
12992 if (!root) return xpath_node_set(); | |
12993 | |
12994 impl::xpath_context c(n, 1, 1); | |
12995 impl::xpath_stack_data sd; | |
12996 | |
12997 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); | |
12998 | |
12999 if (sd.oom) | |
13000 { | |
13001 #ifdef PUGIXML_NO_EXCEPTIONS | |
13002 return xpath_node_set(); | |
13003 #else | |
13004 throw std::bad_alloc(); | |
13005 #endif | |
13006 } | |
13007 | |
13008 return xpath_node_set(r.begin(), r.end(), r.type()); | |
13009 } | |
13010 | |
13011 PUGI__FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const | |
13012 { | |
13013 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
13014 if (!root) return xpath_node(); | |
13015 | |
13016 impl::xpath_context c(n, 1, 1); | |
13017 impl::xpath_stack_data sd; | |
13018 | |
13019 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); | |
13020 | |
13021 if (sd.oom) | |
13022 { | |
13023 #ifdef PUGIXML_NO_EXCEPTIONS | |
13024 return xpath_node(); | |
13025 #else | |
13026 throw std::bad_alloc(); | |
13027 #endif | |
13028 } | |
13029 | |
13030 return r.first(); | |
13031 } | |
13032 | |
13033 PUGI__FN const xpath_parse_result& xpath_query::result() const | |
13034 { | |
13035 return _result; | |
13036 } | |
13037 | |
13038 PUGI__FN static void unspecified_bool_xpath_query(xpath_query***) | |
13039 { | |
13040 } | |
13041 | |
13042 PUGI__FN xpath_query::operator xpath_query::unspecified_bool_type() const | |
13043 { | |
13044 return _impl ? unspecified_bool_xpath_query : 0; | |
13045 } | |
13046 | |
13047 PUGI__FN bool xpath_query::operator!() const | |
13048 { | |
13049 return !_impl; | |
13050 } | |
13051 | |
13052 PUGI__FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const | |
13053 { | |
13054 xpath_query q(query, variables); | |
13055 return q.evaluate_node(*this); | |
13056 } | |
13057 | |
13058 PUGI__FN xpath_node xml_node::select_node(const xpath_query& query) const | |
13059 { | |
13060 return query.evaluate_node(*this); | |
13061 } | |
13062 | |
13063 PUGI__FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const | |
13064 { | |
13065 xpath_query q(query, variables); | |
13066 return q.evaluate_node_set(*this); | |
13067 } | |
13068 | |
13069 PUGI__FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const | |
13070 { | |
13071 return query.evaluate_node_set(*this); | |
13072 } | |
13073 | |
13074 PUGI__FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const | |
13075 { | |
13076 xpath_query q(query, variables); | |
13077 return q.evaluate_node(*this); | |
13078 } | |
13079 | |
13080 PUGI__FN xpath_node xml_node::select_single_node(const xpath_query& query) const | |
13081 { | |
13082 return query.evaluate_node(*this); | |
13083 } | |
13084 } | |
13085 | |
13086 #endif | |
13087 | |
13088 #ifdef __BORLANDC__ | |
13089 # pragma option pop | |
13090 #endif | |
13091 | |
13092 // Intel C++ does not properly keep warning state for function templates, | |
13093 // so popping warning state at the end of translation unit leads to warnings in the middle. | |
13094 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
13095 # pragma warning(pop) | |
13096 #endif | |
13097 | |
13098 #if defined(_MSC_VER) && defined(__c2__) | |
13099 # pragma clang diagnostic pop | |
13100 #endif | |
13101 | |
13102 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) | |
13103 #undef PUGI__NO_INLINE | |
13104 #undef PUGI__UNLIKELY | |
13105 #undef PUGI__STATIC_ASSERT | |
13106 #undef PUGI__DMC_VOLATILE | |
13107 #undef PUGI__UNSIGNED_OVERFLOW | |
13108 #undef PUGI__MSVC_CRT_VERSION | |
13109 #undef PUGI__SNPRINTF | |
13110 #undef PUGI__NS_BEGIN | |
13111 #undef PUGI__NS_END | |
13112 #undef PUGI__FN | |
13113 #undef PUGI__FN_NO_INLINE | |
13114 #undef PUGI__GETHEADER_IMPL | |
13115 #undef PUGI__GETPAGE_IMPL | |
13116 #undef PUGI__GETPAGE | |
13117 #undef PUGI__NODETYPE | |
13118 #undef PUGI__IS_CHARTYPE_IMPL | |
13119 #undef PUGI__IS_CHARTYPE | |
13120 #undef PUGI__IS_CHARTYPEX | |
13121 #undef PUGI__ENDSWITH | |
13122 #undef PUGI__SKIPWS | |
13123 #undef PUGI__OPTSET | |
13124 #undef PUGI__PUSHNODE | |
13125 #undef PUGI__POPNODE | |
13126 #undef PUGI__SCANFOR | |
13127 #undef PUGI__SCANWHILE | |
13128 #undef PUGI__SCANWHILE_UNROLL | |
13129 #undef PUGI__ENDSEG | |
13130 #undef PUGI__THROW_ERROR | |
13131 #undef PUGI__CHECK_ERROR | |
13132 | |
13133 #endif | |
13134 | |
13135 /** | |
13136 * Copyright (c) 2006-2022 Arseny Kapoulkine | |
13137 * | |
13138 * Permission is hereby granted, free of charge, to any person | |
13139 * obtaining a copy of this software and associated documentation | |
13140 * files (the "Software"), to deal in the Software without | |
13141 * restriction, including without limitation the rights to use, | |
13142 * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
13143 * copies of the Software, and to permit persons to whom the | |
13144 * Software is furnished to do so, subject to the following | |
13145 * conditions: | |
13146 * | |
13147 * The above copyright notice and this permission notice shall be | |
13148 * included in all copies or substantial portions of the Software. | |
13149 * | |
13150 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
13151 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
13152 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
13153 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
13154 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
13155 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
13156 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
13157 * OTHER DEALINGS IN THE SOFTWARE. | |
13158 */ |