Mercurial > minori
comparison dep/pugixml/src/pugixml.cpp @ 367:8d45d892be88 default tip
*: instead of pugixml, use Qt XML features
this means we have one extra Qt dependency though...
author | Paper <paper@tflc.us> |
---|---|
date | Sun, 17 Nov 2024 22:55:47 -0500 |
parents | 886f66775f31 |
children |
comparison
equal
deleted
inserted
replaced
366:886f66775f31 | 367:8d45d892be88 |
---|---|
1 /** | |
2 * pugixml parser - version 1.14 | |
3 * -------------------------------------------------------- | |
4 * Copyright (C) 2006-2023, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) | |
5 * Report bugs and download new versions at https://pugixml.org/ | |
6 * | |
7 * This library is distributed under the MIT License. See notice at the end | |
8 * of this file. | |
9 * | |
10 * This work is based on the pugxml parser, which is: | |
11 * Copyright (C) 2003, by Kristen Wegner (kristen@tima.net) | |
12 */ | |
13 | |
14 #ifndef SOURCE_PUGIXML_CPP | |
15 #define SOURCE_PUGIXML_CPP | |
16 | |
17 #include "pugixml.hpp" | |
18 | |
19 #include <stdlib.h> | |
20 #include <stdio.h> | |
21 #include <string.h> | |
22 #include <assert.h> | |
23 #include <limits.h> | |
24 | |
25 #ifdef PUGIXML_WCHAR_MODE | |
26 # include <wchar.h> | |
27 #endif | |
28 | |
29 #ifndef PUGIXML_NO_XPATH | |
30 # include <math.h> | |
31 # include <float.h> | |
32 #endif | |
33 | |
34 #ifndef PUGIXML_NO_STL | |
35 # include <istream> | |
36 # include <ostream> | |
37 # include <string> | |
38 #endif | |
39 | |
40 // For placement new | |
41 #include <new> | |
42 | |
43 // For load_file | |
44 #if defined(__linux__) || defined(__APPLE__) | |
45 #include <sys/stat.h> | |
46 #endif | |
47 | |
48 #ifdef _MSC_VER | |
49 # pragma warning(push) | |
50 # pragma warning(disable: 4127) // conditional expression is constant | |
51 # pragma warning(disable: 4324) // structure was padded due to __declspec(align()) | |
52 # pragma warning(disable: 4702) // unreachable code | |
53 # pragma warning(disable: 4996) // this function or variable may be unsafe | |
54 #endif | |
55 | |
56 #if defined(_MSC_VER) && defined(__c2__) | |
57 # pragma clang diagnostic push | |
58 # pragma clang diagnostic ignored "-Wdeprecated" // this function or variable may be unsafe | |
59 #endif | |
60 | |
61 #ifdef __INTEL_COMPILER | |
62 # pragma warning(disable: 177) // function was declared but never referenced | |
63 # pragma warning(disable: 279) // controlling expression is constant | |
64 # pragma warning(disable: 1478 1786) // function was declared "deprecated" | |
65 # pragma warning(disable: 1684) // conversion from pointer to same-sized integral type | |
66 #endif | |
67 | |
68 #if defined(__BORLANDC__) && defined(PUGIXML_HEADER_ONLY) | |
69 # pragma warn -8080 // symbol is declared but never used; disabling this inside push/pop bracket does not make the warning go away | |
70 #endif | |
71 | |
72 #ifdef __BORLANDC__ | |
73 # pragma option push | |
74 # pragma warn -8008 // condition is always false | |
75 # pragma warn -8066 // unreachable code | |
76 #endif | |
77 | |
78 #ifdef __SNC__ | |
79 // Using diag_push/diag_pop does not disable the warnings inside templates due to a compiler bug | |
80 # pragma diag_suppress=178 // function was declared but never referenced | |
81 # pragma diag_suppress=237 // controlling expression is constant | |
82 #endif | |
83 | |
84 #ifdef __TI_COMPILER_VERSION__ | |
85 # pragma diag_suppress 179 // function was declared but never referenced | |
86 #endif | |
87 | |
88 // Inlining controls | |
89 #if defined(_MSC_VER) && _MSC_VER >= 1300 | |
90 # define PUGI_IMPL_NO_INLINE __declspec(noinline) | |
91 #elif defined(__GNUC__) | |
92 # define PUGI_IMPL_NO_INLINE __attribute__((noinline)) | |
93 #else | |
94 # define PUGI_IMPL_NO_INLINE | |
95 #endif | |
96 | |
97 // Branch weight controls | |
98 #if defined(__GNUC__) && !defined(__c2__) | |
99 # define PUGI_IMPL_UNLIKELY(cond) __builtin_expect(cond, 0) | |
100 #else | |
101 # define PUGI_IMPL_UNLIKELY(cond) (cond) | |
102 #endif | |
103 | |
104 // Simple static assertion | |
105 #define PUGI_IMPL_STATIC_ASSERT(cond) { static const char condition_failed[(cond) ? 1 : -1] = {0}; (void)condition_failed[0]; } | |
106 | |
107 // Digital Mars C++ bug workaround for passing char loaded from memory via stack | |
108 #ifdef __DMC__ | |
109 # define PUGI_IMPL_DMC_VOLATILE volatile | |
110 #else | |
111 # define PUGI_IMPL_DMC_VOLATILE | |
112 #endif | |
113 | |
114 // Integer sanitizer workaround; we only apply this for clang since gcc8 has no_sanitize but not unsigned-integer-overflow and produces "attribute directive ignored" warnings | |
115 #if defined(__clang__) && defined(__has_attribute) | |
116 # if __has_attribute(no_sanitize) | |
117 # define PUGI_IMPL_UNSIGNED_OVERFLOW __attribute__((no_sanitize("unsigned-integer-overflow"))) | |
118 # else | |
119 # define PUGI_IMPL_UNSIGNED_OVERFLOW | |
120 # endif | |
121 #else | |
122 # define PUGI_IMPL_UNSIGNED_OVERFLOW | |
123 #endif | |
124 | |
125 // Borland C++ bug workaround for not defining ::memcpy depending on header include order (can't always use std::memcpy because some compilers don't have it at all) | |
126 #if defined(__BORLANDC__) && !defined(__MEM_H_USING_LIST) | |
127 using std::memcpy; | |
128 using std::memmove; | |
129 using std::memset; | |
130 #endif | |
131 | |
132 // Old versions of GCC do not define ::malloc and ::free depending on header include order | |
133 #if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4)) | |
134 using std::malloc; | |
135 using std::free; | |
136 #endif | |
137 | |
138 // Some MinGW/GCC versions have headers that erroneously omit LLONG_MIN/LLONG_MAX/ULLONG_MAX definitions from limits.h in some configurations | |
139 #if defined(PUGIXML_HAS_LONG_LONG) && defined(__GNUC__) && !defined(LLONG_MAX) && !defined(LLONG_MIN) && !defined(ULLONG_MAX) | |
140 # define LLONG_MIN (-LLONG_MAX - 1LL) | |
141 # define LLONG_MAX __LONG_LONG_MAX__ | |
142 # define ULLONG_MAX (LLONG_MAX * 2ULL + 1ULL) | |
143 #endif | |
144 | |
145 // In some environments MSVC is a compiler but the CRT lacks certain MSVC-specific features | |
146 #if defined(_MSC_VER) && !defined(__S3E__) && !defined(_WIN32_WCE) | |
147 # define PUGI_IMPL_MSVC_CRT_VERSION _MSC_VER | |
148 #elif defined(_WIN32_WCE) | |
149 # define PUGI_IMPL_MSVC_CRT_VERSION 1310 // MSVC7.1 | |
150 #endif | |
151 | |
152 // Not all platforms have snprintf; we define a wrapper that uses snprintf if possible. This only works with buffers with a known size. | |
153 #if __cplusplus >= 201103 | |
154 # define PUGI_IMPL_SNPRINTF(buf, ...) snprintf(buf, sizeof(buf), __VA_ARGS__) | |
155 #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 | |
156 # define PUGI_IMPL_SNPRINTF(buf, ...) _snprintf_s(buf, _countof(buf), _TRUNCATE, __VA_ARGS__) | |
157 #elif defined(__APPLE__) && __clang_major__ >= 14 // Xcode 14 marks sprintf as deprecated while still using C++98 by default | |
158 # define PUGI_IMPL_SNPRINTF(buf, fmt, arg1, arg2) snprintf(buf, sizeof(buf), fmt, arg1, arg2) | |
159 #else | |
160 # define PUGI_IMPL_SNPRINTF sprintf | |
161 #endif | |
162 | |
163 // We put implementation details into an anonymous namespace in source mode, but have to keep it in non-anonymous namespace in header-only mode to prevent binary bloat. | |
164 #ifdef PUGIXML_HEADER_ONLY | |
165 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl { | |
166 # define PUGI_IMPL_NS_END } } | |
167 # define PUGI_IMPL_FN inline | |
168 # define PUGI_IMPL_FN_NO_INLINE inline | |
169 #else | |
170 # if defined(_MSC_VER) && _MSC_VER < 1300 // MSVC6 seems to have an amusing bug with anonymous namespaces inside namespaces | |
171 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl { | |
172 # define PUGI_IMPL_NS_END } } | |
173 # else | |
174 # define PUGI_IMPL_NS_BEGIN namespace pugi { namespace impl { namespace { | |
175 # define PUGI_IMPL_NS_END } } } | |
176 # endif | |
177 # define PUGI_IMPL_FN | |
178 # define PUGI_IMPL_FN_NO_INLINE PUGI_IMPL_NO_INLINE | |
179 #endif | |
180 | |
181 // uintptr_t | |
182 #if (defined(_MSC_VER) && _MSC_VER < 1600) || (defined(__BORLANDC__) && __BORLANDC__ < 0x561) | |
183 namespace pugi | |
184 { | |
185 # ifndef _UINTPTR_T_DEFINED | |
186 typedef size_t uintptr_t; | |
187 # endif | |
188 | |
189 typedef unsigned __int8 uint8_t; | |
190 typedef unsigned __int16 uint16_t; | |
191 typedef unsigned __int32 uint32_t; | |
192 } | |
193 #else | |
194 # include <stdint.h> | |
195 #endif | |
196 | |
197 // Memory allocation | |
198 PUGI_IMPL_NS_BEGIN | |
199 PUGI_IMPL_FN void* default_allocate(size_t size) | |
200 { | |
201 return malloc(size); | |
202 } | |
203 | |
204 PUGI_IMPL_FN void default_deallocate(void* ptr) | |
205 { | |
206 free(ptr); | |
207 } | |
208 | |
209 template <typename T> | |
210 struct xml_memory_management_function_storage | |
211 { | |
212 static allocation_function allocate; | |
213 static deallocation_function deallocate; | |
214 }; | |
215 | |
216 // Global allocation functions are stored in class statics so that in header mode linker deduplicates them | |
217 // Without a template<> we'll get multiple definitions of the same static | |
218 template <typename T> allocation_function xml_memory_management_function_storage<T>::allocate = default_allocate; | |
219 template <typename T> deallocation_function xml_memory_management_function_storage<T>::deallocate = default_deallocate; | |
220 | |
221 typedef xml_memory_management_function_storage<int> xml_memory; | |
222 PUGI_IMPL_NS_END | |
223 | |
224 // String utilities | |
225 PUGI_IMPL_NS_BEGIN | |
226 // Get string length | |
227 PUGI_IMPL_FN size_t strlength(const char_t* s) | |
228 { | |
229 assert(s); | |
230 | |
231 #ifdef PUGIXML_WCHAR_MODE | |
232 return wcslen(s); | |
233 #else | |
234 return strlen(s); | |
235 #endif | |
236 } | |
237 | |
238 // Compare two strings | |
239 PUGI_IMPL_FN bool strequal(const char_t* src, const char_t* dst) | |
240 { | |
241 assert(src && dst); | |
242 | |
243 #ifdef PUGIXML_WCHAR_MODE | |
244 return wcscmp(src, dst) == 0; | |
245 #else | |
246 return strcmp(src, dst) == 0; | |
247 #endif | |
248 } | |
249 | |
250 // Compare lhs with [rhs_begin, rhs_end) | |
251 PUGI_IMPL_FN bool strequalrange(const char_t* lhs, const char_t* rhs, size_t count) | |
252 { | |
253 for (size_t i = 0; i < count; ++i) | |
254 if (lhs[i] != rhs[i]) | |
255 return false; | |
256 | |
257 return lhs[count] == 0; | |
258 } | |
259 | |
260 // Get length of wide string, even if CRT lacks wide character support | |
261 PUGI_IMPL_FN size_t strlength_wide(const wchar_t* s) | |
262 { | |
263 assert(s); | |
264 | |
265 #ifdef PUGIXML_WCHAR_MODE | |
266 return wcslen(s); | |
267 #else | |
268 const wchar_t* end = s; | |
269 while (*end) end++; | |
270 return static_cast<size_t>(end - s); | |
271 #endif | |
272 } | |
273 PUGI_IMPL_NS_END | |
274 | |
275 // auto_ptr-like object for exception recovery | |
276 PUGI_IMPL_NS_BEGIN | |
277 template <typename T> struct auto_deleter | |
278 { | |
279 typedef void (*D)(T*); | |
280 | |
281 T* data; | |
282 D deleter; | |
283 | |
284 auto_deleter(T* data_, D deleter_): data(data_), deleter(deleter_) | |
285 { | |
286 } | |
287 | |
288 ~auto_deleter() | |
289 { | |
290 if (data) deleter(data); | |
291 } | |
292 | |
293 T* release() | |
294 { | |
295 T* result = data; | |
296 data = 0; | |
297 return result; | |
298 } | |
299 }; | |
300 PUGI_IMPL_NS_END | |
301 | |
302 #ifdef PUGIXML_COMPACT | |
303 PUGI_IMPL_NS_BEGIN | |
304 class compact_hash_table | |
305 { | |
306 public: | |
307 compact_hash_table(): _items(0), _capacity(0), _count(0) | |
308 { | |
309 } | |
310 | |
311 void clear() | |
312 { | |
313 if (_items) | |
314 { | |
315 xml_memory::deallocate(_items); | |
316 _items = 0; | |
317 _capacity = 0; | |
318 _count = 0; | |
319 } | |
320 } | |
321 | |
322 void* find(const void* key) | |
323 { | |
324 if (_capacity == 0) return 0; | |
325 | |
326 item_t* item = get_item(key); | |
327 assert(item); | |
328 assert(item->key == key || (item->key == 0 && item->value == 0)); | |
329 | |
330 return item->value; | |
331 } | |
332 | |
333 void insert(const void* key, void* value) | |
334 { | |
335 assert(_capacity != 0 && _count < _capacity - _capacity / 4); | |
336 | |
337 item_t* item = get_item(key); | |
338 assert(item); | |
339 | |
340 if (item->key == 0) | |
341 { | |
342 _count++; | |
343 item->key = key; | |
344 } | |
345 | |
346 item->value = value; | |
347 } | |
348 | |
349 bool reserve(size_t extra = 16) | |
350 { | |
351 if (_count + extra >= _capacity - _capacity / 4) | |
352 return rehash(_count + extra); | |
353 | |
354 return true; | |
355 } | |
356 | |
357 private: | |
358 struct item_t | |
359 { | |
360 const void* key; | |
361 void* value; | |
362 }; | |
363 | |
364 item_t* _items; | |
365 size_t _capacity; | |
366 | |
367 size_t _count; | |
368 | |
369 bool rehash(size_t count); | |
370 | |
371 item_t* get_item(const void* key) | |
372 { | |
373 assert(key); | |
374 assert(_capacity > 0); | |
375 | |
376 size_t hashmod = _capacity - 1; | |
377 size_t bucket = hash(key) & hashmod; | |
378 | |
379 for (size_t probe = 0; probe <= hashmod; ++probe) | |
380 { | |
381 item_t& probe_item = _items[bucket]; | |
382 | |
383 if (probe_item.key == key || probe_item.key == 0) | |
384 return &probe_item; | |
385 | |
386 // hash collision, quadratic probing | |
387 bucket = (bucket + probe + 1) & hashmod; | |
388 } | |
389 | |
390 assert(false && "Hash table is full"); // unreachable | |
391 return 0; | |
392 } | |
393 | |
394 static PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash(const void* key) | |
395 { | |
396 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key) & 0xffffffff); | |
397 | |
398 // MurmurHash3 32-bit finalizer | |
399 h ^= h >> 16; | |
400 h *= 0x85ebca6bu; | |
401 h ^= h >> 13; | |
402 h *= 0xc2b2ae35u; | |
403 h ^= h >> 16; | |
404 | |
405 return h; | |
406 } | |
407 }; | |
408 | |
409 PUGI_IMPL_FN_NO_INLINE bool compact_hash_table::rehash(size_t count) | |
410 { | |
411 size_t capacity = 32; | |
412 while (count >= capacity - capacity / 4) | |
413 capacity *= 2; | |
414 | |
415 compact_hash_table rt; | |
416 rt._capacity = capacity; | |
417 rt._items = static_cast<item_t*>(xml_memory::allocate(sizeof(item_t) * capacity)); | |
418 | |
419 if (!rt._items) | |
420 return false; | |
421 | |
422 memset(rt._items, 0, sizeof(item_t) * capacity); | |
423 | |
424 for (size_t i = 0; i < _capacity; ++i) | |
425 if (_items[i].key) | |
426 rt.insert(_items[i].key, _items[i].value); | |
427 | |
428 if (_items) | |
429 xml_memory::deallocate(_items); | |
430 | |
431 _capacity = capacity; | |
432 _items = rt._items; | |
433 | |
434 assert(_count == rt._count); | |
435 | |
436 return true; | |
437 } | |
438 | |
439 PUGI_IMPL_NS_END | |
440 #endif | |
441 | |
442 PUGI_IMPL_NS_BEGIN | |
443 #ifdef PUGIXML_COMPACT | |
444 static const uintptr_t xml_memory_block_alignment = 4; | |
445 #else | |
446 static const uintptr_t xml_memory_block_alignment = sizeof(void*); | |
447 #endif | |
448 | |
449 // extra metadata bits | |
450 static const uintptr_t xml_memory_page_contents_shared_mask = 64; | |
451 static const uintptr_t xml_memory_page_name_allocated_mask = 32; | |
452 static const uintptr_t xml_memory_page_value_allocated_mask = 16; | |
453 static const uintptr_t xml_memory_page_type_mask = 15; | |
454 | |
455 // combined masks for string uniqueness | |
456 static const uintptr_t xml_memory_page_name_allocated_or_shared_mask = xml_memory_page_name_allocated_mask | xml_memory_page_contents_shared_mask; | |
457 static const uintptr_t xml_memory_page_value_allocated_or_shared_mask = xml_memory_page_value_allocated_mask | xml_memory_page_contents_shared_mask; | |
458 | |
459 #ifdef PUGIXML_COMPACT | |
460 #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) // unused | |
461 #define PUGI_IMPL_GETPAGE_IMPL(header) (header).get_page() | |
462 #else | |
463 #define PUGI_IMPL_GETHEADER_IMPL(object, page, flags) (((reinterpret_cast<char*>(object) - reinterpret_cast<char*>(page)) << 8) | (flags)) | |
464 // this macro casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
465 #define PUGI_IMPL_GETPAGE_IMPL(header) static_cast<impl::xml_memory_page*>(const_cast<void*>(static_cast<const void*>(reinterpret_cast<const char*>(&header) - (header >> 8)))) | |
466 #endif | |
467 | |
468 #define PUGI_IMPL_GETPAGE(n) PUGI_IMPL_GETPAGE_IMPL((n)->header) | |
469 #define PUGI_IMPL_NODETYPE(n) static_cast<xml_node_type>((n)->header & impl::xml_memory_page_type_mask) | |
470 | |
471 struct xml_allocator; | |
472 | |
473 struct xml_memory_page | |
474 { | |
475 static xml_memory_page* construct(void* memory) | |
476 { | |
477 xml_memory_page* result = static_cast<xml_memory_page*>(memory); | |
478 | |
479 result->allocator = 0; | |
480 result->prev = 0; | |
481 result->next = 0; | |
482 result->busy_size = 0; | |
483 result->freed_size = 0; | |
484 | |
485 #ifdef PUGIXML_COMPACT | |
486 result->compact_string_base = 0; | |
487 result->compact_shared_parent = 0; | |
488 result->compact_page_marker = 0; | |
489 #endif | |
490 | |
491 return result; | |
492 } | |
493 | |
494 xml_allocator* allocator; | |
495 | |
496 xml_memory_page* prev; | |
497 xml_memory_page* next; | |
498 | |
499 size_t busy_size; | |
500 size_t freed_size; | |
501 | |
502 #ifdef PUGIXML_COMPACT | |
503 char_t* compact_string_base; | |
504 void* compact_shared_parent; | |
505 uint32_t* compact_page_marker; | |
506 #endif | |
507 }; | |
508 | |
509 static const size_t xml_memory_page_size = | |
510 #ifdef PUGIXML_MEMORY_PAGE_SIZE | |
511 (PUGIXML_MEMORY_PAGE_SIZE) | |
512 #else | |
513 32768 | |
514 #endif | |
515 - sizeof(xml_memory_page); | |
516 | |
517 struct xml_memory_string_header | |
518 { | |
519 uint16_t page_offset; // offset from page->data | |
520 uint16_t full_size; // 0 if string occupies whole page | |
521 }; | |
522 | |
523 struct xml_allocator | |
524 { | |
525 xml_allocator(xml_memory_page* root): _root(root), _busy_size(root->busy_size) | |
526 { | |
527 #ifdef PUGIXML_COMPACT | |
528 _hash = 0; | |
529 #endif | |
530 } | |
531 | |
532 xml_memory_page* allocate_page(size_t data_size) | |
533 { | |
534 size_t size = sizeof(xml_memory_page) + data_size; | |
535 | |
536 // allocate block with some alignment, leaving memory for worst-case padding | |
537 void* memory = xml_memory::allocate(size); | |
538 if (!memory) return 0; | |
539 | |
540 // prepare page structure | |
541 xml_memory_page* page = xml_memory_page::construct(memory); | |
542 assert(page); | |
543 | |
544 assert(this == _root->allocator); | |
545 page->allocator = this; | |
546 | |
547 return page; | |
548 } | |
549 | |
550 static void deallocate_page(xml_memory_page* page) | |
551 { | |
552 xml_memory::deallocate(page); | |
553 } | |
554 | |
555 void* allocate_memory_oob(size_t size, xml_memory_page*& out_page); | |
556 | |
557 void* allocate_memory(size_t size, xml_memory_page*& out_page) | |
558 { | |
559 if (PUGI_IMPL_UNLIKELY(_busy_size + size > xml_memory_page_size)) | |
560 return allocate_memory_oob(size, out_page); | |
561 | |
562 void* buf = reinterpret_cast<char*>(_root) + sizeof(xml_memory_page) + _busy_size; | |
563 | |
564 _busy_size += size; | |
565 | |
566 out_page = _root; | |
567 | |
568 return buf; | |
569 } | |
570 | |
571 #ifdef PUGIXML_COMPACT | |
572 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
573 { | |
574 void* result = allocate_memory(size + sizeof(uint32_t), out_page); | |
575 if (!result) return 0; | |
576 | |
577 // adjust for marker | |
578 ptrdiff_t offset = static_cast<char*>(result) - reinterpret_cast<char*>(out_page->compact_page_marker); | |
579 | |
580 if (PUGI_IMPL_UNLIKELY(static_cast<uintptr_t>(offset) >= 256 * xml_memory_block_alignment)) | |
581 { | |
582 // insert new marker | |
583 uint32_t* marker = static_cast<uint32_t*>(result); | |
584 | |
585 *marker = static_cast<uint32_t>(reinterpret_cast<char*>(marker) - reinterpret_cast<char*>(out_page)); | |
586 out_page->compact_page_marker = marker; | |
587 | |
588 // since we don't reuse the page space until we reallocate it, we can just pretend that we freed the marker block | |
589 // this will make sure deallocate_memory correctly tracks the size | |
590 out_page->freed_size += sizeof(uint32_t); | |
591 | |
592 return marker + 1; | |
593 } | |
594 else | |
595 { | |
596 // roll back uint32_t part | |
597 _busy_size -= sizeof(uint32_t); | |
598 | |
599 return result; | |
600 } | |
601 } | |
602 #else | |
603 void* allocate_object(size_t size, xml_memory_page*& out_page) | |
604 { | |
605 return allocate_memory(size, out_page); | |
606 } | |
607 #endif | |
608 | |
609 void deallocate_memory(void* ptr, size_t size, xml_memory_page* page) | |
610 { | |
611 if (page == _root) page->busy_size = _busy_size; | |
612 | |
613 assert(ptr >= reinterpret_cast<char*>(page) + sizeof(xml_memory_page) && ptr < reinterpret_cast<char*>(page) + sizeof(xml_memory_page) + page->busy_size); | |
614 (void)!ptr; | |
615 | |
616 page->freed_size += size; | |
617 assert(page->freed_size <= page->busy_size); | |
618 | |
619 if (page->freed_size == page->busy_size) | |
620 { | |
621 if (page->next == 0) | |
622 { | |
623 assert(_root == page); | |
624 | |
625 // top page freed, just reset sizes | |
626 page->busy_size = 0; | |
627 page->freed_size = 0; | |
628 | |
629 #ifdef PUGIXML_COMPACT | |
630 // reset compact state to maximize efficiency | |
631 page->compact_string_base = 0; | |
632 page->compact_shared_parent = 0; | |
633 page->compact_page_marker = 0; | |
634 #endif | |
635 | |
636 _busy_size = 0; | |
637 } | |
638 else | |
639 { | |
640 assert(_root != page); | |
641 assert(page->prev); | |
642 | |
643 // remove from the list | |
644 page->prev->next = page->next; | |
645 page->next->prev = page->prev; | |
646 | |
647 // deallocate | |
648 deallocate_page(page); | |
649 } | |
650 } | |
651 } | |
652 | |
653 char_t* allocate_string(size_t length) | |
654 { | |
655 static const size_t max_encoded_offset = (1 << 16) * xml_memory_block_alignment; | |
656 | |
657 PUGI_IMPL_STATIC_ASSERT(xml_memory_page_size <= max_encoded_offset); | |
658 | |
659 // allocate memory for string and header block | |
660 size_t size = sizeof(xml_memory_string_header) + length * sizeof(char_t); | |
661 | |
662 // round size up to block alignment boundary | |
663 size_t full_size = (size + (xml_memory_block_alignment - 1)) & ~(xml_memory_block_alignment - 1); | |
664 | |
665 xml_memory_page* page; | |
666 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(allocate_memory(full_size, page)); | |
667 | |
668 if (!header) return 0; | |
669 | |
670 // setup header | |
671 ptrdiff_t page_offset = reinterpret_cast<char*>(header) - reinterpret_cast<char*>(page) - sizeof(xml_memory_page); | |
672 | |
673 assert(page_offset % xml_memory_block_alignment == 0); | |
674 assert(page_offset >= 0 && static_cast<size_t>(page_offset) < max_encoded_offset); | |
675 header->page_offset = static_cast<uint16_t>(static_cast<size_t>(page_offset) / xml_memory_block_alignment); | |
676 | |
677 // full_size == 0 for large strings that occupy the whole page | |
678 assert(full_size % xml_memory_block_alignment == 0); | |
679 assert(full_size < max_encoded_offset || (page->busy_size == full_size && page_offset == 0)); | |
680 header->full_size = static_cast<uint16_t>(full_size < max_encoded_offset ? full_size / xml_memory_block_alignment : 0); | |
681 | |
682 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
683 // header is guaranteed a pointer-sized alignment, which should be enough for char_t | |
684 return static_cast<char_t*>(static_cast<void*>(header + 1)); | |
685 } | |
686 | |
687 void deallocate_string(char_t* string) | |
688 { | |
689 // this function casts pointers through void* to avoid 'cast increases required alignment of target type' warnings | |
690 // we're guaranteed the proper (pointer-sized) alignment on the input string if it was allocated via allocate_string | |
691 | |
692 // get header | |
693 xml_memory_string_header* header = static_cast<xml_memory_string_header*>(static_cast<void*>(string)) - 1; | |
694 assert(header); | |
695 | |
696 // deallocate | |
697 size_t page_offset = sizeof(xml_memory_page) + header->page_offset * xml_memory_block_alignment; | |
698 xml_memory_page* page = reinterpret_cast<xml_memory_page*>(static_cast<void*>(reinterpret_cast<char*>(header) - page_offset)); | |
699 | |
700 // if full_size == 0 then this string occupies the whole page | |
701 size_t full_size = header->full_size == 0 ? page->busy_size : header->full_size * xml_memory_block_alignment; | |
702 | |
703 deallocate_memory(header, full_size, page); | |
704 } | |
705 | |
706 bool reserve() | |
707 { | |
708 #ifdef PUGIXML_COMPACT | |
709 return _hash->reserve(); | |
710 #else | |
711 return true; | |
712 #endif | |
713 } | |
714 | |
715 xml_memory_page* _root; | |
716 size_t _busy_size; | |
717 | |
718 #ifdef PUGIXML_COMPACT | |
719 compact_hash_table* _hash; | |
720 #endif | |
721 }; | |
722 | |
723 PUGI_IMPL_FN_NO_INLINE void* xml_allocator::allocate_memory_oob(size_t size, xml_memory_page*& out_page) | |
724 { | |
725 const size_t large_allocation_threshold = xml_memory_page_size / 4; | |
726 | |
727 xml_memory_page* page = allocate_page(size <= large_allocation_threshold ? xml_memory_page_size : size); | |
728 out_page = page; | |
729 | |
730 if (!page) return 0; | |
731 | |
732 if (size <= large_allocation_threshold) | |
733 { | |
734 _root->busy_size = _busy_size; | |
735 | |
736 // insert page at the end of linked list | |
737 page->prev = _root; | |
738 _root->next = page; | |
739 _root = page; | |
740 | |
741 _busy_size = size; | |
742 } | |
743 else | |
744 { | |
745 // insert page before the end of linked list, so that it is deleted as soon as possible | |
746 // the last page is not deleted even if it's empty (see deallocate_memory) | |
747 assert(_root->prev); | |
748 | |
749 page->prev = _root->prev; | |
750 page->next = _root; | |
751 | |
752 _root->prev->next = page; | |
753 _root->prev = page; | |
754 | |
755 page->busy_size = size; | |
756 } | |
757 | |
758 return reinterpret_cast<char*>(page) + sizeof(xml_memory_page); | |
759 } | |
760 PUGI_IMPL_NS_END | |
761 | |
762 #ifdef PUGIXML_COMPACT | |
763 PUGI_IMPL_NS_BEGIN | |
764 static const uintptr_t compact_alignment_log2 = 2; | |
765 static const uintptr_t compact_alignment = 1 << compact_alignment_log2; | |
766 | |
767 class compact_header | |
768 { | |
769 public: | |
770 compact_header(xml_memory_page* page, unsigned int flags) | |
771 { | |
772 PUGI_IMPL_STATIC_ASSERT(xml_memory_block_alignment == compact_alignment); | |
773 | |
774 ptrdiff_t offset = (reinterpret_cast<char*>(this) - reinterpret_cast<char*>(page->compact_page_marker)); | |
775 assert(offset % compact_alignment == 0 && static_cast<uintptr_t>(offset) < 256 * compact_alignment); | |
776 | |
777 _page = static_cast<unsigned char>(offset >> compact_alignment_log2); | |
778 _flags = static_cast<unsigned char>(flags); | |
779 } | |
780 | |
781 void operator&=(uintptr_t mod) | |
782 { | |
783 _flags &= static_cast<unsigned char>(mod); | |
784 } | |
785 | |
786 void operator|=(uintptr_t mod) | |
787 { | |
788 _flags |= static_cast<unsigned char>(mod); | |
789 } | |
790 | |
791 uintptr_t operator&(uintptr_t mod) const | |
792 { | |
793 return _flags & mod; | |
794 } | |
795 | |
796 xml_memory_page* get_page() const | |
797 { | |
798 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
799 const char* page_marker = reinterpret_cast<const char*>(this) - (_page << compact_alignment_log2); | |
800 const char* page = page_marker - *reinterpret_cast<const uint32_t*>(static_cast<const void*>(page_marker)); | |
801 | |
802 return const_cast<xml_memory_page*>(reinterpret_cast<const xml_memory_page*>(static_cast<const void*>(page))); | |
803 } | |
804 | |
805 private: | |
806 unsigned char _page; | |
807 unsigned char _flags; | |
808 }; | |
809 | |
810 PUGI_IMPL_FN xml_memory_page* compact_get_page(const void* object, int header_offset) | |
811 { | |
812 const compact_header* header = reinterpret_cast<const compact_header*>(static_cast<const char*>(object) - header_offset); | |
813 | |
814 return header->get_page(); | |
815 } | |
816 | |
817 template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE T* compact_get_value(const void* object) | |
818 { | |
819 return static_cast<T*>(compact_get_page(object, header_offset)->allocator->_hash->find(object)); | |
820 } | |
821 | |
822 template <int header_offset, typename T> PUGI_IMPL_FN_NO_INLINE void compact_set_value(const void* object, T* value) | |
823 { | |
824 compact_get_page(object, header_offset)->allocator->_hash->insert(object, value); | |
825 } | |
826 | |
827 template <typename T, int header_offset, int start = -126> class compact_pointer | |
828 { | |
829 public: | |
830 compact_pointer(): _data(0) | |
831 { | |
832 } | |
833 | |
834 void operator=(const compact_pointer& rhs) | |
835 { | |
836 *this = rhs + 0; | |
837 } | |
838 | |
839 void operator=(T* value) | |
840 { | |
841 if (value) | |
842 { | |
843 // value is guaranteed to be compact-aligned; 'this' is not | |
844 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
845 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
846 // compensate for arithmetic shift rounding for negative values | |
847 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
848 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) - start; | |
849 | |
850 if (static_cast<uintptr_t>(offset) <= 253) | |
851 _data = static_cast<unsigned char>(offset + 1); | |
852 else | |
853 { | |
854 compact_set_value<header_offset>(this, value); | |
855 | |
856 _data = 255; | |
857 } | |
858 } | |
859 else | |
860 _data = 0; | |
861 } | |
862 | |
863 operator T*() const | |
864 { | |
865 if (_data) | |
866 { | |
867 if (_data < 255) | |
868 { | |
869 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
870 | |
871 return reinterpret_cast<T*>(base + (_data - 1 + start) * compact_alignment); | |
872 } | |
873 else | |
874 return compact_get_value<header_offset, T>(this); | |
875 } | |
876 else | |
877 return 0; | |
878 } | |
879 | |
880 T* operator->() const | |
881 { | |
882 return *this; | |
883 } | |
884 | |
885 private: | |
886 unsigned char _data; | |
887 }; | |
888 | |
889 template <typename T, int header_offset> class compact_pointer_parent | |
890 { | |
891 public: | |
892 compact_pointer_parent(): _data(0) | |
893 { | |
894 } | |
895 | |
896 void operator=(const compact_pointer_parent& rhs) | |
897 { | |
898 *this = rhs + 0; | |
899 } | |
900 | |
901 void operator=(T* value) | |
902 { | |
903 if (value) | |
904 { | |
905 // value is guaranteed to be compact-aligned; 'this' is not | |
906 // our decoding is based on 'this' aligned to compact alignment downwards (see operator T*) | |
907 // so for negative offsets (e.g. -3) we need to adjust the diff by compact_alignment - 1 to | |
908 // compensate for arithmetic shift behavior for negative values | |
909 ptrdiff_t diff = reinterpret_cast<char*>(value) - reinterpret_cast<char*>(this); | |
910 ptrdiff_t offset = ((diff + int(compact_alignment - 1)) >> compact_alignment_log2) + 65533; | |
911 | |
912 if (static_cast<uintptr_t>(offset) <= 65533) | |
913 { | |
914 _data = static_cast<unsigned short>(offset + 1); | |
915 } | |
916 else | |
917 { | |
918 xml_memory_page* page = compact_get_page(this, header_offset); | |
919 | |
920 if (PUGI_IMPL_UNLIKELY(page->compact_shared_parent == 0)) | |
921 page->compact_shared_parent = value; | |
922 | |
923 if (page->compact_shared_parent == value) | |
924 { | |
925 _data = 65534; | |
926 } | |
927 else | |
928 { | |
929 compact_set_value<header_offset>(this, value); | |
930 | |
931 _data = 65535; | |
932 } | |
933 } | |
934 } | |
935 else | |
936 { | |
937 _data = 0; | |
938 } | |
939 } | |
940 | |
941 operator T*() const | |
942 { | |
943 if (_data) | |
944 { | |
945 if (_data < 65534) | |
946 { | |
947 uintptr_t base = reinterpret_cast<uintptr_t>(this) & ~(compact_alignment - 1); | |
948 | |
949 return reinterpret_cast<T*>(base + (_data - 1 - 65533) * compact_alignment); | |
950 } | |
951 else if (_data == 65534) | |
952 return static_cast<T*>(compact_get_page(this, header_offset)->compact_shared_parent); | |
953 else | |
954 return compact_get_value<header_offset, T>(this); | |
955 } | |
956 else | |
957 return 0; | |
958 } | |
959 | |
960 T* operator->() const | |
961 { | |
962 return *this; | |
963 } | |
964 | |
965 private: | |
966 uint16_t _data; | |
967 }; | |
968 | |
969 template <int header_offset, int base_offset> class compact_string | |
970 { | |
971 public: | |
972 compact_string(): _data(0) | |
973 { | |
974 } | |
975 | |
976 void operator=(const compact_string& rhs) | |
977 { | |
978 *this = rhs + 0; | |
979 } | |
980 | |
981 void operator=(char_t* value) | |
982 { | |
983 if (value) | |
984 { | |
985 xml_memory_page* page = compact_get_page(this, header_offset); | |
986 | |
987 if (PUGI_IMPL_UNLIKELY(page->compact_string_base == 0)) | |
988 page->compact_string_base = value; | |
989 | |
990 ptrdiff_t offset = value - page->compact_string_base; | |
991 | |
992 if (static_cast<uintptr_t>(offset) < (65535 << 7)) | |
993 { | |
994 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
995 uint16_t* base = reinterpret_cast<uint16_t*>(static_cast<void*>(reinterpret_cast<char*>(this) - base_offset)); | |
996 | |
997 if (*base == 0) | |
998 { | |
999 *base = static_cast<uint16_t>((offset >> 7) + 1); | |
1000 _data = static_cast<unsigned char>((offset & 127) + 1); | |
1001 } | |
1002 else | |
1003 { | |
1004 ptrdiff_t remainder = offset - ((*base - 1) << 7); | |
1005 | |
1006 if (static_cast<uintptr_t>(remainder) <= 253) | |
1007 { | |
1008 _data = static_cast<unsigned char>(remainder + 1); | |
1009 } | |
1010 else | |
1011 { | |
1012 compact_set_value<header_offset>(this, value); | |
1013 | |
1014 _data = 255; | |
1015 } | |
1016 } | |
1017 } | |
1018 else | |
1019 { | |
1020 compact_set_value<header_offset>(this, value); | |
1021 | |
1022 _data = 255; | |
1023 } | |
1024 } | |
1025 else | |
1026 { | |
1027 _data = 0; | |
1028 } | |
1029 } | |
1030 | |
1031 operator char_t*() const | |
1032 { | |
1033 if (_data) | |
1034 { | |
1035 if (_data < 255) | |
1036 { | |
1037 xml_memory_page* page = compact_get_page(this, header_offset); | |
1038 | |
1039 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
1040 const uint16_t* base = reinterpret_cast<const uint16_t*>(static_cast<const void*>(reinterpret_cast<const char*>(this) - base_offset)); | |
1041 assert(*base); | |
1042 | |
1043 ptrdiff_t offset = ((*base - 1) << 7) + (_data - 1); | |
1044 | |
1045 return page->compact_string_base + offset; | |
1046 } | |
1047 else | |
1048 { | |
1049 return compact_get_value<header_offset, char_t>(this); | |
1050 } | |
1051 } | |
1052 else | |
1053 return 0; | |
1054 } | |
1055 | |
1056 private: | |
1057 unsigned char _data; | |
1058 }; | |
1059 PUGI_IMPL_NS_END | |
1060 #endif | |
1061 | |
1062 #ifdef PUGIXML_COMPACT | |
1063 namespace pugi | |
1064 { | |
1065 struct xml_attribute_struct | |
1066 { | |
1067 xml_attribute_struct(impl::xml_memory_page* page): header(page, 0), namevalue_base(0) | |
1068 { | |
1069 PUGI_IMPL_STATIC_ASSERT(sizeof(xml_attribute_struct) == 8); | |
1070 } | |
1071 | |
1072 impl::compact_header header; | |
1073 | |
1074 uint16_t namevalue_base; | |
1075 | |
1076 impl::compact_string<4, 2> name; | |
1077 impl::compact_string<5, 3> value; | |
1078 | |
1079 impl::compact_pointer<xml_attribute_struct, 6> prev_attribute_c; | |
1080 impl::compact_pointer<xml_attribute_struct, 7, 0> next_attribute; | |
1081 }; | |
1082 | |
1083 struct xml_node_struct | |
1084 { | |
1085 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): header(page, type), namevalue_base(0) | |
1086 { | |
1087 PUGI_IMPL_STATIC_ASSERT(sizeof(xml_node_struct) == 12); | |
1088 } | |
1089 | |
1090 impl::compact_header header; | |
1091 | |
1092 uint16_t namevalue_base; | |
1093 | |
1094 impl::compact_string<4, 2> name; | |
1095 impl::compact_string<5, 3> value; | |
1096 | |
1097 impl::compact_pointer_parent<xml_node_struct, 6> parent; | |
1098 | |
1099 impl::compact_pointer<xml_node_struct, 8, 0> first_child; | |
1100 | |
1101 impl::compact_pointer<xml_node_struct, 9> prev_sibling_c; | |
1102 impl::compact_pointer<xml_node_struct, 10, 0> next_sibling; | |
1103 | |
1104 impl::compact_pointer<xml_attribute_struct, 11, 0> first_attribute; | |
1105 }; | |
1106 } | |
1107 #else | |
1108 namespace pugi | |
1109 { | |
1110 struct xml_attribute_struct | |
1111 { | |
1112 xml_attribute_struct(impl::xml_memory_page* page): name(0), value(0), prev_attribute_c(0), next_attribute(0) | |
1113 { | |
1114 header = PUGI_IMPL_GETHEADER_IMPL(this, page, 0); | |
1115 } | |
1116 | |
1117 uintptr_t header; | |
1118 | |
1119 char_t* name; | |
1120 char_t* value; | |
1121 | |
1122 xml_attribute_struct* prev_attribute_c; | |
1123 xml_attribute_struct* next_attribute; | |
1124 }; | |
1125 | |
1126 struct xml_node_struct | |
1127 { | |
1128 xml_node_struct(impl::xml_memory_page* page, xml_node_type type): name(0), value(0), parent(0), first_child(0), prev_sibling_c(0), next_sibling(0), first_attribute(0) | |
1129 { | |
1130 header = PUGI_IMPL_GETHEADER_IMPL(this, page, type); | |
1131 } | |
1132 | |
1133 uintptr_t header; | |
1134 | |
1135 char_t* name; | |
1136 char_t* value; | |
1137 | |
1138 xml_node_struct* parent; | |
1139 | |
1140 xml_node_struct* first_child; | |
1141 | |
1142 xml_node_struct* prev_sibling_c; | |
1143 xml_node_struct* next_sibling; | |
1144 | |
1145 xml_attribute_struct* first_attribute; | |
1146 }; | |
1147 } | |
1148 #endif | |
1149 | |
1150 PUGI_IMPL_NS_BEGIN | |
1151 struct xml_extra_buffer | |
1152 { | |
1153 char_t* buffer; | |
1154 xml_extra_buffer* next; | |
1155 }; | |
1156 | |
1157 struct xml_document_struct: public xml_node_struct, public xml_allocator | |
1158 { | |
1159 xml_document_struct(xml_memory_page* page): xml_node_struct(page, node_document), xml_allocator(page), buffer(0), extra_buffers(0) | |
1160 { | |
1161 } | |
1162 | |
1163 const char_t* buffer; | |
1164 | |
1165 xml_extra_buffer* extra_buffers; | |
1166 | |
1167 #ifdef PUGIXML_COMPACT | |
1168 compact_hash_table hash; | |
1169 #endif | |
1170 }; | |
1171 | |
1172 template <typename Object> inline xml_allocator& get_allocator(const Object* object) | |
1173 { | |
1174 assert(object); | |
1175 | |
1176 return *PUGI_IMPL_GETPAGE(object)->allocator; | |
1177 } | |
1178 | |
1179 template <typename Object> inline xml_document_struct& get_document(const Object* object) | |
1180 { | |
1181 assert(object); | |
1182 | |
1183 return *static_cast<xml_document_struct*>(PUGI_IMPL_GETPAGE(object)->allocator); | |
1184 } | |
1185 PUGI_IMPL_NS_END | |
1186 | |
1187 // Low-level DOM operations | |
1188 PUGI_IMPL_NS_BEGIN | |
1189 inline xml_attribute_struct* allocate_attribute(xml_allocator& alloc) | |
1190 { | |
1191 xml_memory_page* page; | |
1192 void* memory = alloc.allocate_object(sizeof(xml_attribute_struct), page); | |
1193 if (!memory) return 0; | |
1194 | |
1195 return new (memory) xml_attribute_struct(page); | |
1196 } | |
1197 | |
1198 inline xml_node_struct* allocate_node(xml_allocator& alloc, xml_node_type type) | |
1199 { | |
1200 xml_memory_page* page; | |
1201 void* memory = alloc.allocate_object(sizeof(xml_node_struct), page); | |
1202 if (!memory) return 0; | |
1203 | |
1204 return new (memory) xml_node_struct(page, type); | |
1205 } | |
1206 | |
1207 inline void destroy_attribute(xml_attribute_struct* a, xml_allocator& alloc) | |
1208 { | |
1209 if (a->header & impl::xml_memory_page_name_allocated_mask) | |
1210 alloc.deallocate_string(a->name); | |
1211 | |
1212 if (a->header & impl::xml_memory_page_value_allocated_mask) | |
1213 alloc.deallocate_string(a->value); | |
1214 | |
1215 alloc.deallocate_memory(a, sizeof(xml_attribute_struct), PUGI_IMPL_GETPAGE(a)); | |
1216 } | |
1217 | |
1218 inline void destroy_node(xml_node_struct* n, xml_allocator& alloc) | |
1219 { | |
1220 if (n->header & impl::xml_memory_page_name_allocated_mask) | |
1221 alloc.deallocate_string(n->name); | |
1222 | |
1223 if (n->header & impl::xml_memory_page_value_allocated_mask) | |
1224 alloc.deallocate_string(n->value); | |
1225 | |
1226 for (xml_attribute_struct* attr = n->first_attribute; attr; ) | |
1227 { | |
1228 xml_attribute_struct* next = attr->next_attribute; | |
1229 | |
1230 destroy_attribute(attr, alloc); | |
1231 | |
1232 attr = next; | |
1233 } | |
1234 | |
1235 for (xml_node_struct* child = n->first_child; child; ) | |
1236 { | |
1237 xml_node_struct* next = child->next_sibling; | |
1238 | |
1239 destroy_node(child, alloc); | |
1240 | |
1241 child = next; | |
1242 } | |
1243 | |
1244 alloc.deallocate_memory(n, sizeof(xml_node_struct), PUGI_IMPL_GETPAGE(n)); | |
1245 } | |
1246 | |
1247 inline void append_node(xml_node_struct* child, xml_node_struct* node) | |
1248 { | |
1249 child->parent = node; | |
1250 | |
1251 xml_node_struct* head = node->first_child; | |
1252 | |
1253 if (head) | |
1254 { | |
1255 xml_node_struct* tail = head->prev_sibling_c; | |
1256 | |
1257 tail->next_sibling = child; | |
1258 child->prev_sibling_c = tail; | |
1259 head->prev_sibling_c = child; | |
1260 } | |
1261 else | |
1262 { | |
1263 node->first_child = child; | |
1264 child->prev_sibling_c = child; | |
1265 } | |
1266 } | |
1267 | |
1268 inline void prepend_node(xml_node_struct* child, xml_node_struct* node) | |
1269 { | |
1270 child->parent = node; | |
1271 | |
1272 xml_node_struct* head = node->first_child; | |
1273 | |
1274 if (head) | |
1275 { | |
1276 child->prev_sibling_c = head->prev_sibling_c; | |
1277 head->prev_sibling_c = child; | |
1278 } | |
1279 else | |
1280 child->prev_sibling_c = child; | |
1281 | |
1282 child->next_sibling = head; | |
1283 node->first_child = child; | |
1284 } | |
1285 | |
1286 inline void insert_node_after(xml_node_struct* child, xml_node_struct* node) | |
1287 { | |
1288 xml_node_struct* parent = node->parent; | |
1289 | |
1290 child->parent = parent; | |
1291 | |
1292 xml_node_struct* next = node->next_sibling; | |
1293 | |
1294 if (next) | |
1295 next->prev_sibling_c = child; | |
1296 else | |
1297 parent->first_child->prev_sibling_c = child; | |
1298 | |
1299 child->next_sibling = next; | |
1300 child->prev_sibling_c = node; | |
1301 | |
1302 node->next_sibling = child; | |
1303 } | |
1304 | |
1305 inline void insert_node_before(xml_node_struct* child, xml_node_struct* node) | |
1306 { | |
1307 xml_node_struct* parent = node->parent; | |
1308 | |
1309 child->parent = parent; | |
1310 | |
1311 xml_node_struct* prev = node->prev_sibling_c; | |
1312 | |
1313 if (prev->next_sibling) | |
1314 prev->next_sibling = child; | |
1315 else | |
1316 parent->first_child = child; | |
1317 | |
1318 child->prev_sibling_c = prev; | |
1319 child->next_sibling = node; | |
1320 | |
1321 node->prev_sibling_c = child; | |
1322 } | |
1323 | |
1324 inline void remove_node(xml_node_struct* node) | |
1325 { | |
1326 xml_node_struct* parent = node->parent; | |
1327 | |
1328 xml_node_struct* next = node->next_sibling; | |
1329 xml_node_struct* prev = node->prev_sibling_c; | |
1330 | |
1331 if (next) | |
1332 next->prev_sibling_c = prev; | |
1333 else | |
1334 parent->first_child->prev_sibling_c = prev; | |
1335 | |
1336 if (prev->next_sibling) | |
1337 prev->next_sibling = next; | |
1338 else | |
1339 parent->first_child = next; | |
1340 | |
1341 node->parent = 0; | |
1342 node->prev_sibling_c = 0; | |
1343 node->next_sibling = 0; | |
1344 } | |
1345 | |
1346 inline void append_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1347 { | |
1348 xml_attribute_struct* head = node->first_attribute; | |
1349 | |
1350 if (head) | |
1351 { | |
1352 xml_attribute_struct* tail = head->prev_attribute_c; | |
1353 | |
1354 tail->next_attribute = attr; | |
1355 attr->prev_attribute_c = tail; | |
1356 head->prev_attribute_c = attr; | |
1357 } | |
1358 else | |
1359 { | |
1360 node->first_attribute = attr; | |
1361 attr->prev_attribute_c = attr; | |
1362 } | |
1363 } | |
1364 | |
1365 inline void prepend_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1366 { | |
1367 xml_attribute_struct* head = node->first_attribute; | |
1368 | |
1369 if (head) | |
1370 { | |
1371 attr->prev_attribute_c = head->prev_attribute_c; | |
1372 head->prev_attribute_c = attr; | |
1373 } | |
1374 else | |
1375 attr->prev_attribute_c = attr; | |
1376 | |
1377 attr->next_attribute = head; | |
1378 node->first_attribute = attr; | |
1379 } | |
1380 | |
1381 inline void insert_attribute_after(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
1382 { | |
1383 xml_attribute_struct* next = place->next_attribute; | |
1384 | |
1385 if (next) | |
1386 next->prev_attribute_c = attr; | |
1387 else | |
1388 node->first_attribute->prev_attribute_c = attr; | |
1389 | |
1390 attr->next_attribute = next; | |
1391 attr->prev_attribute_c = place; | |
1392 place->next_attribute = attr; | |
1393 } | |
1394 | |
1395 inline void insert_attribute_before(xml_attribute_struct* attr, xml_attribute_struct* place, xml_node_struct* node) | |
1396 { | |
1397 xml_attribute_struct* prev = place->prev_attribute_c; | |
1398 | |
1399 if (prev->next_attribute) | |
1400 prev->next_attribute = attr; | |
1401 else | |
1402 node->first_attribute = attr; | |
1403 | |
1404 attr->prev_attribute_c = prev; | |
1405 attr->next_attribute = place; | |
1406 place->prev_attribute_c = attr; | |
1407 } | |
1408 | |
1409 inline void remove_attribute(xml_attribute_struct* attr, xml_node_struct* node) | |
1410 { | |
1411 xml_attribute_struct* next = attr->next_attribute; | |
1412 xml_attribute_struct* prev = attr->prev_attribute_c; | |
1413 | |
1414 if (next) | |
1415 next->prev_attribute_c = prev; | |
1416 else | |
1417 node->first_attribute->prev_attribute_c = prev; | |
1418 | |
1419 if (prev->next_attribute) | |
1420 prev->next_attribute = next; | |
1421 else | |
1422 node->first_attribute = next; | |
1423 | |
1424 attr->prev_attribute_c = 0; | |
1425 attr->next_attribute = 0; | |
1426 } | |
1427 | |
1428 PUGI_IMPL_FN_NO_INLINE xml_node_struct* append_new_node(xml_node_struct* node, xml_allocator& alloc, xml_node_type type = node_element) | |
1429 { | |
1430 if (!alloc.reserve()) return 0; | |
1431 | |
1432 xml_node_struct* child = allocate_node(alloc, type); | |
1433 if (!child) return 0; | |
1434 | |
1435 append_node(child, node); | |
1436 | |
1437 return child; | |
1438 } | |
1439 | |
1440 PUGI_IMPL_FN_NO_INLINE xml_attribute_struct* append_new_attribute(xml_node_struct* node, xml_allocator& alloc) | |
1441 { | |
1442 if (!alloc.reserve()) return 0; | |
1443 | |
1444 xml_attribute_struct* attr = allocate_attribute(alloc); | |
1445 if (!attr) return 0; | |
1446 | |
1447 append_attribute(attr, node); | |
1448 | |
1449 return attr; | |
1450 } | |
1451 PUGI_IMPL_NS_END | |
1452 | |
1453 // Helper classes for code generation | |
1454 PUGI_IMPL_NS_BEGIN | |
1455 struct opt_false | |
1456 { | |
1457 enum { value = 0 }; | |
1458 }; | |
1459 | |
1460 struct opt_true | |
1461 { | |
1462 enum { value = 1 }; | |
1463 }; | |
1464 PUGI_IMPL_NS_END | |
1465 | |
1466 // Unicode utilities | |
1467 PUGI_IMPL_NS_BEGIN | |
1468 inline uint16_t endian_swap(uint16_t value) | |
1469 { | |
1470 return static_cast<uint16_t>(((value & 0xff) << 8) | (value >> 8)); | |
1471 } | |
1472 | |
1473 inline uint32_t endian_swap(uint32_t value) | |
1474 { | |
1475 return ((value & 0xff) << 24) | ((value & 0xff00) << 8) | ((value & 0xff0000) >> 8) | (value >> 24); | |
1476 } | |
1477 | |
1478 struct utf8_counter | |
1479 { | |
1480 typedef size_t value_type; | |
1481 | |
1482 static value_type low(value_type result, uint32_t ch) | |
1483 { | |
1484 // U+0000..U+007F | |
1485 if (ch < 0x80) return result + 1; | |
1486 // U+0080..U+07FF | |
1487 else if (ch < 0x800) return result + 2; | |
1488 // U+0800..U+FFFF | |
1489 else return result + 3; | |
1490 } | |
1491 | |
1492 static value_type high(value_type result, uint32_t) | |
1493 { | |
1494 // U+10000..U+10FFFF | |
1495 return result + 4; | |
1496 } | |
1497 }; | |
1498 | |
1499 struct utf8_writer | |
1500 { | |
1501 typedef uint8_t* value_type; | |
1502 | |
1503 static value_type low(value_type result, uint32_t ch) | |
1504 { | |
1505 // U+0000..U+007F | |
1506 if (ch < 0x80) | |
1507 { | |
1508 *result = static_cast<uint8_t>(ch); | |
1509 return result + 1; | |
1510 } | |
1511 // U+0080..U+07FF | |
1512 else if (ch < 0x800) | |
1513 { | |
1514 result[0] = static_cast<uint8_t>(0xC0 | (ch >> 6)); | |
1515 result[1] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1516 return result + 2; | |
1517 } | |
1518 // U+0800..U+FFFF | |
1519 else | |
1520 { | |
1521 result[0] = static_cast<uint8_t>(0xE0 | (ch >> 12)); | |
1522 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
1523 result[2] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1524 return result + 3; | |
1525 } | |
1526 } | |
1527 | |
1528 static value_type high(value_type result, uint32_t ch) | |
1529 { | |
1530 // U+10000..U+10FFFF | |
1531 result[0] = static_cast<uint8_t>(0xF0 | (ch >> 18)); | |
1532 result[1] = static_cast<uint8_t>(0x80 | ((ch >> 12) & 0x3F)); | |
1533 result[2] = static_cast<uint8_t>(0x80 | ((ch >> 6) & 0x3F)); | |
1534 result[3] = static_cast<uint8_t>(0x80 | (ch & 0x3F)); | |
1535 return result + 4; | |
1536 } | |
1537 | |
1538 static value_type any(value_type result, uint32_t ch) | |
1539 { | |
1540 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
1541 } | |
1542 }; | |
1543 | |
1544 struct utf16_counter | |
1545 { | |
1546 typedef size_t value_type; | |
1547 | |
1548 static value_type low(value_type result, uint32_t) | |
1549 { | |
1550 return result + 1; | |
1551 } | |
1552 | |
1553 static value_type high(value_type result, uint32_t) | |
1554 { | |
1555 return result + 2; | |
1556 } | |
1557 }; | |
1558 | |
1559 struct utf16_writer | |
1560 { | |
1561 typedef uint16_t* value_type; | |
1562 | |
1563 static value_type low(value_type result, uint32_t ch) | |
1564 { | |
1565 *result = static_cast<uint16_t>(ch); | |
1566 | |
1567 return result + 1; | |
1568 } | |
1569 | |
1570 static value_type high(value_type result, uint32_t ch) | |
1571 { | |
1572 uint32_t msh = static_cast<uint32_t>(ch - 0x10000) >> 10; | |
1573 uint32_t lsh = static_cast<uint32_t>(ch - 0x10000) & 0x3ff; | |
1574 | |
1575 result[0] = static_cast<uint16_t>(0xD800 + msh); | |
1576 result[1] = static_cast<uint16_t>(0xDC00 + lsh); | |
1577 | |
1578 return result + 2; | |
1579 } | |
1580 | |
1581 static value_type any(value_type result, uint32_t ch) | |
1582 { | |
1583 return (ch < 0x10000) ? low(result, ch) : high(result, ch); | |
1584 } | |
1585 }; | |
1586 | |
1587 struct utf32_counter | |
1588 { | |
1589 typedef size_t value_type; | |
1590 | |
1591 static value_type low(value_type result, uint32_t) | |
1592 { | |
1593 return result + 1; | |
1594 } | |
1595 | |
1596 static value_type high(value_type result, uint32_t) | |
1597 { | |
1598 return result + 1; | |
1599 } | |
1600 }; | |
1601 | |
1602 struct utf32_writer | |
1603 { | |
1604 typedef uint32_t* value_type; | |
1605 | |
1606 static value_type low(value_type result, uint32_t ch) | |
1607 { | |
1608 *result = ch; | |
1609 | |
1610 return result + 1; | |
1611 } | |
1612 | |
1613 static value_type high(value_type result, uint32_t ch) | |
1614 { | |
1615 *result = ch; | |
1616 | |
1617 return result + 1; | |
1618 } | |
1619 | |
1620 static value_type any(value_type result, uint32_t ch) | |
1621 { | |
1622 *result = ch; | |
1623 | |
1624 return result + 1; | |
1625 } | |
1626 }; | |
1627 | |
1628 struct latin1_writer | |
1629 { | |
1630 typedef uint8_t* value_type; | |
1631 | |
1632 static value_type low(value_type result, uint32_t ch) | |
1633 { | |
1634 *result = static_cast<uint8_t>(ch > 255 ? '?' : ch); | |
1635 | |
1636 return result + 1; | |
1637 } | |
1638 | |
1639 static value_type high(value_type result, uint32_t ch) | |
1640 { | |
1641 (void)ch; | |
1642 | |
1643 *result = '?'; | |
1644 | |
1645 return result + 1; | |
1646 } | |
1647 }; | |
1648 | |
1649 struct utf8_decoder | |
1650 { | |
1651 typedef uint8_t type; | |
1652 | |
1653 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
1654 { | |
1655 const uint8_t utf8_byte_mask = 0x3f; | |
1656 | |
1657 while (size) | |
1658 { | |
1659 uint8_t lead = *data; | |
1660 | |
1661 // 0xxxxxxx -> U+0000..U+007F | |
1662 if (lead < 0x80) | |
1663 { | |
1664 result = Traits::low(result, lead); | |
1665 data += 1; | |
1666 size -= 1; | |
1667 | |
1668 // process aligned single-byte (ascii) blocks | |
1669 if ((reinterpret_cast<uintptr_t>(data) & 3) == 0) | |
1670 { | |
1671 // round-trip through void* to silence 'cast increases required alignment of target type' warnings | |
1672 while (size >= 4 && (*static_cast<const uint32_t*>(static_cast<const void*>(data)) & 0x80808080) == 0) | |
1673 { | |
1674 result = Traits::low(result, data[0]); | |
1675 result = Traits::low(result, data[1]); | |
1676 result = Traits::low(result, data[2]); | |
1677 result = Traits::low(result, data[3]); | |
1678 data += 4; | |
1679 size -= 4; | |
1680 } | |
1681 } | |
1682 } | |
1683 // 110xxxxx -> U+0080..U+07FF | |
1684 else if (static_cast<unsigned int>(lead - 0xC0) < 0x20 && size >= 2 && (data[1] & 0xc0) == 0x80) | |
1685 { | |
1686 result = Traits::low(result, ((lead & ~0xC0) << 6) | (data[1] & utf8_byte_mask)); | |
1687 data += 2; | |
1688 size -= 2; | |
1689 } | |
1690 // 1110xxxx -> U+0800-U+FFFF | |
1691 else if (static_cast<unsigned int>(lead - 0xE0) < 0x10 && size >= 3 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80) | |
1692 { | |
1693 result = Traits::low(result, ((lead & ~0xE0) << 12) | ((data[1] & utf8_byte_mask) << 6) | (data[2] & utf8_byte_mask)); | |
1694 data += 3; | |
1695 size -= 3; | |
1696 } | |
1697 // 11110xxx -> U+10000..U+10FFFF | |
1698 else if (static_cast<unsigned int>(lead - 0xF0) < 0x08 && size >= 4 && (data[1] & 0xc0) == 0x80 && (data[2] & 0xc0) == 0x80 && (data[3] & 0xc0) == 0x80) | |
1699 { | |
1700 result = Traits::high(result, ((lead & ~0xF0) << 18) | ((data[1] & utf8_byte_mask) << 12) | ((data[2] & utf8_byte_mask) << 6) | (data[3] & utf8_byte_mask)); | |
1701 data += 4; | |
1702 size -= 4; | |
1703 } | |
1704 // 10xxxxxx or 11111xxx -> invalid | |
1705 else | |
1706 { | |
1707 data += 1; | |
1708 size -= 1; | |
1709 } | |
1710 } | |
1711 | |
1712 return result; | |
1713 } | |
1714 }; | |
1715 | |
1716 template <typename opt_swap> struct utf16_decoder | |
1717 { | |
1718 typedef uint16_t type; | |
1719 | |
1720 template <typename Traits> static inline typename Traits::value_type process(const uint16_t* data, size_t size, typename Traits::value_type result, Traits) | |
1721 { | |
1722 while (size) | |
1723 { | |
1724 uint16_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
1725 | |
1726 // U+0000..U+D7FF | |
1727 if (lead < 0xD800) | |
1728 { | |
1729 result = Traits::low(result, lead); | |
1730 data += 1; | |
1731 size -= 1; | |
1732 } | |
1733 // U+E000..U+FFFF | |
1734 else if (static_cast<unsigned int>(lead - 0xE000) < 0x2000) | |
1735 { | |
1736 result = Traits::low(result, lead); | |
1737 data += 1; | |
1738 size -= 1; | |
1739 } | |
1740 // surrogate pair lead | |
1741 else if (static_cast<unsigned int>(lead - 0xD800) < 0x400 && size >= 2) | |
1742 { | |
1743 uint16_t next = opt_swap::value ? endian_swap(data[1]) : data[1]; | |
1744 | |
1745 if (static_cast<unsigned int>(next - 0xDC00) < 0x400) | |
1746 { | |
1747 result = Traits::high(result, 0x10000 + ((lead & 0x3ff) << 10) + (next & 0x3ff)); | |
1748 data += 2; | |
1749 size -= 2; | |
1750 } | |
1751 else | |
1752 { | |
1753 data += 1; | |
1754 size -= 1; | |
1755 } | |
1756 } | |
1757 else | |
1758 { | |
1759 data += 1; | |
1760 size -= 1; | |
1761 } | |
1762 } | |
1763 | |
1764 return result; | |
1765 } | |
1766 }; | |
1767 | |
1768 template <typename opt_swap> struct utf32_decoder | |
1769 { | |
1770 typedef uint32_t type; | |
1771 | |
1772 template <typename Traits> static inline typename Traits::value_type process(const uint32_t* data, size_t size, typename Traits::value_type result, Traits) | |
1773 { | |
1774 while (size) | |
1775 { | |
1776 uint32_t lead = opt_swap::value ? endian_swap(*data) : *data; | |
1777 | |
1778 // U+0000..U+FFFF | |
1779 if (lead < 0x10000) | |
1780 { | |
1781 result = Traits::low(result, lead); | |
1782 data += 1; | |
1783 size -= 1; | |
1784 } | |
1785 // U+10000..U+10FFFF | |
1786 else | |
1787 { | |
1788 result = Traits::high(result, lead); | |
1789 data += 1; | |
1790 size -= 1; | |
1791 } | |
1792 } | |
1793 | |
1794 return result; | |
1795 } | |
1796 }; | |
1797 | |
1798 struct latin1_decoder | |
1799 { | |
1800 typedef uint8_t type; | |
1801 | |
1802 template <typename Traits> static inline typename Traits::value_type process(const uint8_t* data, size_t size, typename Traits::value_type result, Traits) | |
1803 { | |
1804 while (size) | |
1805 { | |
1806 result = Traits::low(result, *data); | |
1807 data += 1; | |
1808 size -= 1; | |
1809 } | |
1810 | |
1811 return result; | |
1812 } | |
1813 }; | |
1814 | |
1815 template <size_t size> struct wchar_selector; | |
1816 | |
1817 template <> struct wchar_selector<2> | |
1818 { | |
1819 typedef uint16_t type; | |
1820 typedef utf16_counter counter; | |
1821 typedef utf16_writer writer; | |
1822 typedef utf16_decoder<opt_false> decoder; | |
1823 }; | |
1824 | |
1825 template <> struct wchar_selector<4> | |
1826 { | |
1827 typedef uint32_t type; | |
1828 typedef utf32_counter counter; | |
1829 typedef utf32_writer writer; | |
1830 typedef utf32_decoder<opt_false> decoder; | |
1831 }; | |
1832 | |
1833 typedef wchar_selector<sizeof(wchar_t)>::counter wchar_counter; | |
1834 typedef wchar_selector<sizeof(wchar_t)>::writer wchar_writer; | |
1835 | |
1836 struct wchar_decoder | |
1837 { | |
1838 typedef wchar_t type; | |
1839 | |
1840 template <typename Traits> static inline typename Traits::value_type process(const wchar_t* data, size_t size, typename Traits::value_type result, Traits traits) | |
1841 { | |
1842 typedef wchar_selector<sizeof(wchar_t)>::decoder decoder; | |
1843 | |
1844 return decoder::process(reinterpret_cast<const typename decoder::type*>(data), size, result, traits); | |
1845 } | |
1846 }; | |
1847 | |
1848 #ifdef PUGIXML_WCHAR_MODE | |
1849 PUGI_IMPL_FN void convert_wchar_endian_swap(wchar_t* result, const wchar_t* data, size_t length) | |
1850 { | |
1851 for (size_t i = 0; i < length; ++i) | |
1852 result[i] = static_cast<wchar_t>(endian_swap(static_cast<wchar_selector<sizeof(wchar_t)>::type>(data[i]))); | |
1853 } | |
1854 #endif | |
1855 PUGI_IMPL_NS_END | |
1856 | |
1857 PUGI_IMPL_NS_BEGIN | |
1858 enum chartype_t | |
1859 { | |
1860 ct_parse_pcdata = 1, // \0, &, \r, < | |
1861 ct_parse_attr = 2, // \0, &, \r, ', " | |
1862 ct_parse_attr_ws = 4, // \0, &, \r, ', ", \n, tab | |
1863 ct_space = 8, // \r, \n, space, tab | |
1864 ct_parse_cdata = 16, // \0, ], >, \r | |
1865 ct_parse_comment = 32, // \0, -, >, \r | |
1866 ct_symbol = 64, // Any symbol > 127, a-z, A-Z, 0-9, _, :, -, . | |
1867 ct_start_symbol = 128 // Any symbol > 127, a-z, A-Z, _, : | |
1868 }; | |
1869 | |
1870 static const unsigned char chartype_table[256] = | |
1871 { | |
1872 55, 0, 0, 0, 0, 0, 0, 0, 0, 12, 12, 0, 0, 63, 0, 0, // 0-15 | |
1873 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 16-31 | |
1874 8, 0, 6, 0, 0, 0, 7, 6, 0, 0, 0, 0, 0, 96, 64, 0, // 32-47 | |
1875 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 192, 0, 1, 0, 48, 0, // 48-63 | |
1876 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 64-79 | |
1877 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 16, 0, 192, // 80-95 | |
1878 0, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 96-111 | |
1879 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 0, 0, 0, 0, 0, // 112-127 | |
1880 | |
1881 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, // 128+ | |
1882 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1883 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1884 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1885 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1886 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1887 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, | |
1888 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192, 192 | |
1889 }; | |
1890 | |
1891 enum chartypex_t | |
1892 { | |
1893 ctx_special_pcdata = 1, // Any symbol >= 0 and < 32 (except \t, \r, \n), &, <, > | |
1894 ctx_special_attr = 2, // Any symbol >= 0 and < 32, &, <, ", ' | |
1895 ctx_start_symbol = 4, // Any symbol > 127, a-z, A-Z, _ | |
1896 ctx_digit = 8, // 0-9 | |
1897 ctx_symbol = 16 // Any symbol > 127, a-z, A-Z, 0-9, _, -, . | |
1898 }; | |
1899 | |
1900 static const unsigned char chartypex_table[256] = | |
1901 { | |
1902 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 3, 3, 2, 3, 3, // 0-15 | |
1903 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 16-31 | |
1904 0, 0, 2, 0, 0, 0, 3, 2, 0, 0, 0, 0, 0, 16, 16, 0, // 32-47 | |
1905 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 0, 0, 3, 0, 1, 0, // 48-63 | |
1906 | |
1907 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 64-79 | |
1908 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 20, // 80-95 | |
1909 0, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 96-111 | |
1910 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, // 112-127 | |
1911 | |
1912 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, // 128+ | |
1913 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1914 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1915 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1916 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1917 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1918 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, | |
1919 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20 | |
1920 }; | |
1921 | |
1922 #ifdef PUGIXML_WCHAR_MODE | |
1923 #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) ((static_cast<unsigned int>(c) < 128 ? table[static_cast<unsigned int>(c)] : table[128]) & (ct)) | |
1924 #else | |
1925 #define PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, table) (table[static_cast<unsigned char>(c)] & (ct)) | |
1926 #endif | |
1927 | |
1928 #define PUGI_IMPL_IS_CHARTYPE(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartype_table) | |
1929 #define PUGI_IMPL_IS_CHARTYPEX(c, ct) PUGI_IMPL_IS_CHARTYPE_IMPL(c, ct, chartypex_table) | |
1930 | |
1931 PUGI_IMPL_FN bool is_little_endian() | |
1932 { | |
1933 unsigned int ui = 1; | |
1934 | |
1935 return *reinterpret_cast<unsigned char*>(&ui) == 1; | |
1936 } | |
1937 | |
1938 PUGI_IMPL_FN xml_encoding get_wchar_encoding() | |
1939 { | |
1940 PUGI_IMPL_STATIC_ASSERT(sizeof(wchar_t) == 2 || sizeof(wchar_t) == 4); | |
1941 | |
1942 if (sizeof(wchar_t) == 2) | |
1943 return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
1944 else | |
1945 return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
1946 } | |
1947 | |
1948 PUGI_IMPL_FN bool parse_declaration_encoding(const uint8_t* data, size_t size, const uint8_t*& out_encoding, size_t& out_length) | |
1949 { | |
1950 #define PUGI_IMPL_SCANCHAR(ch) { if (offset >= size || data[offset] != ch) return false; offset++; } | |
1951 #define PUGI_IMPL_SCANCHARTYPE(ct) { while (offset < size && PUGI_IMPL_IS_CHARTYPE(data[offset], ct)) offset++; } | |
1952 | |
1953 // check if we have a non-empty XML declaration | |
1954 if (size < 6 || !((data[0] == '<') & (data[1] == '?') & (data[2] == 'x') & (data[3] == 'm') & (data[4] == 'l') && PUGI_IMPL_IS_CHARTYPE(data[5], ct_space))) | |
1955 return false; | |
1956 | |
1957 // scan XML declaration until the encoding field | |
1958 for (size_t i = 6; i + 1 < size; ++i) | |
1959 { | |
1960 // declaration can not contain ? in quoted values | |
1961 if (data[i] == '?') | |
1962 return false; | |
1963 | |
1964 if (data[i] == 'e' && data[i + 1] == 'n') | |
1965 { | |
1966 size_t offset = i; | |
1967 | |
1968 // encoding follows the version field which can't contain 'en' so this has to be the encoding if XML is well formed | |
1969 PUGI_IMPL_SCANCHAR('e'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('c'); PUGI_IMPL_SCANCHAR('o'); | |
1970 PUGI_IMPL_SCANCHAR('d'); PUGI_IMPL_SCANCHAR('i'); PUGI_IMPL_SCANCHAR('n'); PUGI_IMPL_SCANCHAR('g'); | |
1971 | |
1972 // S? = S? | |
1973 PUGI_IMPL_SCANCHARTYPE(ct_space); | |
1974 PUGI_IMPL_SCANCHAR('='); | |
1975 PUGI_IMPL_SCANCHARTYPE(ct_space); | |
1976 | |
1977 // the only two valid delimiters are ' and " | |
1978 uint8_t delimiter = (offset < size && data[offset] == '"') ? '"' : '\''; | |
1979 | |
1980 PUGI_IMPL_SCANCHAR(delimiter); | |
1981 | |
1982 size_t start = offset; | |
1983 | |
1984 out_encoding = data + offset; | |
1985 | |
1986 PUGI_IMPL_SCANCHARTYPE(ct_symbol); | |
1987 | |
1988 out_length = offset - start; | |
1989 | |
1990 PUGI_IMPL_SCANCHAR(delimiter); | |
1991 | |
1992 return true; | |
1993 } | |
1994 } | |
1995 | |
1996 return false; | |
1997 | |
1998 #undef PUGI_IMPL_SCANCHAR | |
1999 #undef PUGI_IMPL_SCANCHARTYPE | |
2000 } | |
2001 | |
2002 PUGI_IMPL_FN xml_encoding guess_buffer_encoding(const uint8_t* data, size_t size) | |
2003 { | |
2004 // skip encoding autodetection if input buffer is too small | |
2005 if (size < 4) return encoding_utf8; | |
2006 | |
2007 uint8_t d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; | |
2008 | |
2009 // look for BOM in first few bytes | |
2010 if (d0 == 0 && d1 == 0 && d2 == 0xfe && d3 == 0xff) return encoding_utf32_be; | |
2011 if (d0 == 0xff && d1 == 0xfe && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
2012 if (d0 == 0xfe && d1 == 0xff) return encoding_utf16_be; | |
2013 if (d0 == 0xff && d1 == 0xfe) return encoding_utf16_le; | |
2014 if (d0 == 0xef && d1 == 0xbb && d2 == 0xbf) return encoding_utf8; | |
2015 | |
2016 // look for <, <? or <?xm in various encodings | |
2017 if (d0 == 0 && d1 == 0 && d2 == 0 && d3 == 0x3c) return encoding_utf32_be; | |
2018 if (d0 == 0x3c && d1 == 0 && d2 == 0 && d3 == 0) return encoding_utf32_le; | |
2019 if (d0 == 0 && d1 == 0x3c && d2 == 0 && d3 == 0x3f) return encoding_utf16_be; | |
2020 if (d0 == 0x3c && d1 == 0 && d2 == 0x3f && d3 == 0) return encoding_utf16_le; | |
2021 | |
2022 // look for utf16 < followed by node name (this may fail, but is better than utf8 since it's zero terminated so early) | |
2023 if (d0 == 0 && d1 == 0x3c) return encoding_utf16_be; | |
2024 if (d0 == 0x3c && d1 == 0) return encoding_utf16_le; | |
2025 | |
2026 // no known BOM detected; parse declaration | |
2027 const uint8_t* enc = 0; | |
2028 size_t enc_length = 0; | |
2029 | |
2030 if (d0 == 0x3c && d1 == 0x3f && d2 == 0x78 && d3 == 0x6d && parse_declaration_encoding(data, size, enc, enc_length)) | |
2031 { | |
2032 // iso-8859-1 (case-insensitive) | |
2033 if (enc_length == 10 | |
2034 && (enc[0] | ' ') == 'i' && (enc[1] | ' ') == 's' && (enc[2] | ' ') == 'o' | |
2035 && enc[3] == '-' && enc[4] == '8' && enc[5] == '8' && enc[6] == '5' && enc[7] == '9' | |
2036 && enc[8] == '-' && enc[9] == '1') | |
2037 return encoding_latin1; | |
2038 | |
2039 // latin1 (case-insensitive) | |
2040 if (enc_length == 6 | |
2041 && (enc[0] | ' ') == 'l' && (enc[1] | ' ') == 'a' && (enc[2] | ' ') == 't' | |
2042 && (enc[3] | ' ') == 'i' && (enc[4] | ' ') == 'n' | |
2043 && enc[5] == '1') | |
2044 return encoding_latin1; | |
2045 } | |
2046 | |
2047 return encoding_utf8; | |
2048 } | |
2049 | |
2050 PUGI_IMPL_FN xml_encoding get_buffer_encoding(xml_encoding encoding, const void* contents, size_t size) | |
2051 { | |
2052 // replace wchar encoding with utf implementation | |
2053 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
2054 | |
2055 // replace utf16 encoding with utf16 with specific endianness | |
2056 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2057 | |
2058 // replace utf32 encoding with utf32 with specific endianness | |
2059 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2060 | |
2061 // only do autodetection if no explicit encoding is requested | |
2062 if (encoding != encoding_auto) return encoding; | |
2063 | |
2064 // try to guess encoding (based on XML specification, Appendix F.1) | |
2065 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
2066 | |
2067 return guess_buffer_encoding(data, size); | |
2068 } | |
2069 | |
2070 PUGI_IMPL_FN bool get_mutable_buffer(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2071 { | |
2072 size_t length = size / sizeof(char_t); | |
2073 | |
2074 if (is_mutable) | |
2075 { | |
2076 out_buffer = static_cast<char_t*>(const_cast<void*>(contents)); | |
2077 out_length = length; | |
2078 } | |
2079 else | |
2080 { | |
2081 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2082 if (!buffer) return false; | |
2083 | |
2084 if (contents) | |
2085 memcpy(buffer, contents, length * sizeof(char_t)); | |
2086 else | |
2087 assert(length == 0); | |
2088 | |
2089 buffer[length] = 0; | |
2090 | |
2091 out_buffer = buffer; | |
2092 out_length = length + 1; | |
2093 } | |
2094 | |
2095 return true; | |
2096 } | |
2097 | |
2098 #ifdef PUGIXML_WCHAR_MODE | |
2099 PUGI_IMPL_FN bool need_endian_swap_utf(xml_encoding le, xml_encoding re) | |
2100 { | |
2101 return (le == encoding_utf16_be && re == encoding_utf16_le) || (le == encoding_utf16_le && re == encoding_utf16_be) || | |
2102 (le == encoding_utf32_be && re == encoding_utf32_le) || (le == encoding_utf32_le && re == encoding_utf32_be); | |
2103 } | |
2104 | |
2105 PUGI_IMPL_FN bool convert_buffer_endian_swap(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2106 { | |
2107 const char_t* data = static_cast<const char_t*>(contents); | |
2108 size_t length = size / sizeof(char_t); | |
2109 | |
2110 if (is_mutable) | |
2111 { | |
2112 char_t* buffer = const_cast<char_t*>(data); | |
2113 | |
2114 convert_wchar_endian_swap(buffer, data, length); | |
2115 | |
2116 out_buffer = buffer; | |
2117 out_length = length; | |
2118 } | |
2119 else | |
2120 { | |
2121 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2122 if (!buffer) return false; | |
2123 | |
2124 convert_wchar_endian_swap(buffer, data, length); | |
2125 buffer[length] = 0; | |
2126 | |
2127 out_buffer = buffer; | |
2128 out_length = length + 1; | |
2129 } | |
2130 | |
2131 return true; | |
2132 } | |
2133 | |
2134 template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
2135 { | |
2136 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
2137 size_t data_length = size / sizeof(typename D::type); | |
2138 | |
2139 // first pass: get length in wchar_t units | |
2140 size_t length = D::process(data, data_length, 0, wchar_counter()); | |
2141 | |
2142 // allocate buffer of suitable length | |
2143 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2144 if (!buffer) return false; | |
2145 | |
2146 // second pass: convert utf16 input to wchar_t | |
2147 wchar_writer::value_type obegin = reinterpret_cast<wchar_writer::value_type>(buffer); | |
2148 wchar_writer::value_type oend = D::process(data, data_length, obegin, wchar_writer()); | |
2149 | |
2150 assert(oend == obegin + length); | |
2151 *oend = 0; | |
2152 | |
2153 out_buffer = buffer; | |
2154 out_length = length + 1; | |
2155 | |
2156 return true; | |
2157 } | |
2158 | |
2159 PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
2160 { | |
2161 // get native encoding | |
2162 xml_encoding wchar_encoding = get_wchar_encoding(); | |
2163 | |
2164 // fast path: no conversion required | |
2165 if (encoding == wchar_encoding) | |
2166 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2167 | |
2168 // only endian-swapping is required | |
2169 if (need_endian_swap_utf(encoding, wchar_encoding)) | |
2170 return convert_buffer_endian_swap(out_buffer, out_length, contents, size, is_mutable); | |
2171 | |
2172 // source encoding is utf8 | |
2173 if (encoding == encoding_utf8) | |
2174 return convert_buffer_generic(out_buffer, out_length, contents, size, utf8_decoder()); | |
2175 | |
2176 // source encoding is utf16 | |
2177 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
2178 { | |
2179 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2180 | |
2181 return (native_encoding == encoding) ? | |
2182 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
2183 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
2184 } | |
2185 | |
2186 // source encoding is utf32 | |
2187 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
2188 { | |
2189 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2190 | |
2191 return (native_encoding == encoding) ? | |
2192 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
2193 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
2194 } | |
2195 | |
2196 // source encoding is latin1 | |
2197 if (encoding == encoding_latin1) | |
2198 return convert_buffer_generic(out_buffer, out_length, contents, size, latin1_decoder()); | |
2199 | |
2200 assert(false && "Invalid encoding"); // unreachable | |
2201 return false; | |
2202 } | |
2203 #else | |
2204 template <typename D> PUGI_IMPL_FN bool convert_buffer_generic(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, D) | |
2205 { | |
2206 const typename D::type* data = static_cast<const typename D::type*>(contents); | |
2207 size_t data_length = size / sizeof(typename D::type); | |
2208 | |
2209 // first pass: get length in utf8 units | |
2210 size_t length = D::process(data, data_length, 0, utf8_counter()); | |
2211 | |
2212 // allocate buffer of suitable length | |
2213 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2214 if (!buffer) return false; | |
2215 | |
2216 // second pass: convert utf16 input to utf8 | |
2217 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
2218 uint8_t* oend = D::process(data, data_length, obegin, utf8_writer()); | |
2219 | |
2220 assert(oend == obegin + length); | |
2221 *oend = 0; | |
2222 | |
2223 out_buffer = buffer; | |
2224 out_length = length + 1; | |
2225 | |
2226 return true; | |
2227 } | |
2228 | |
2229 PUGI_IMPL_FN size_t get_latin1_7bit_prefix_length(const uint8_t* data, size_t size) | |
2230 { | |
2231 for (size_t i = 0; i < size; ++i) | |
2232 if (data[i] > 127) | |
2233 return i; | |
2234 | |
2235 return size; | |
2236 } | |
2237 | |
2238 PUGI_IMPL_FN bool convert_buffer_latin1(char_t*& out_buffer, size_t& out_length, const void* contents, size_t size, bool is_mutable) | |
2239 { | |
2240 const uint8_t* data = static_cast<const uint8_t*>(contents); | |
2241 size_t data_length = size; | |
2242 | |
2243 // get size of prefix that does not need utf8 conversion | |
2244 size_t prefix_length = get_latin1_7bit_prefix_length(data, data_length); | |
2245 assert(prefix_length <= data_length); | |
2246 | |
2247 const uint8_t* postfix = data + prefix_length; | |
2248 size_t postfix_length = data_length - prefix_length; | |
2249 | |
2250 // if no conversion is needed, just return the original buffer | |
2251 if (postfix_length == 0) return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2252 | |
2253 // first pass: get length in utf8 units | |
2254 size_t length = prefix_length + latin1_decoder::process(postfix, postfix_length, 0, utf8_counter()); | |
2255 | |
2256 // allocate buffer of suitable length | |
2257 char_t* buffer = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
2258 if (!buffer) return false; | |
2259 | |
2260 // second pass: convert latin1 input to utf8 | |
2261 memcpy(buffer, data, prefix_length); | |
2262 | |
2263 uint8_t* obegin = reinterpret_cast<uint8_t*>(buffer); | |
2264 uint8_t* oend = latin1_decoder::process(postfix, postfix_length, obegin + prefix_length, utf8_writer()); | |
2265 | |
2266 assert(oend == obegin + length); | |
2267 *oend = 0; | |
2268 | |
2269 out_buffer = buffer; | |
2270 out_length = length + 1; | |
2271 | |
2272 return true; | |
2273 } | |
2274 | |
2275 PUGI_IMPL_FN bool convert_buffer(char_t*& out_buffer, size_t& out_length, xml_encoding encoding, const void* contents, size_t size, bool is_mutable) | |
2276 { | |
2277 // fast path: no conversion required | |
2278 if (encoding == encoding_utf8) | |
2279 return get_mutable_buffer(out_buffer, out_length, contents, size, is_mutable); | |
2280 | |
2281 // source encoding is utf16 | |
2282 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
2283 { | |
2284 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
2285 | |
2286 return (native_encoding == encoding) ? | |
2287 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_false>()) : | |
2288 convert_buffer_generic(out_buffer, out_length, contents, size, utf16_decoder<opt_true>()); | |
2289 } | |
2290 | |
2291 // source encoding is utf32 | |
2292 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
2293 { | |
2294 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
2295 | |
2296 return (native_encoding == encoding) ? | |
2297 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_false>()) : | |
2298 convert_buffer_generic(out_buffer, out_length, contents, size, utf32_decoder<opt_true>()); | |
2299 } | |
2300 | |
2301 // source encoding is latin1 | |
2302 if (encoding == encoding_latin1) | |
2303 return convert_buffer_latin1(out_buffer, out_length, contents, size, is_mutable); | |
2304 | |
2305 assert(false && "Invalid encoding"); // unreachable | |
2306 return false; | |
2307 } | |
2308 #endif | |
2309 | |
2310 PUGI_IMPL_FN size_t as_utf8_begin(const wchar_t* str, size_t length) | |
2311 { | |
2312 // get length in utf8 characters | |
2313 return wchar_decoder::process(str, length, 0, utf8_counter()); | |
2314 } | |
2315 | |
2316 PUGI_IMPL_FN void as_utf8_end(char* buffer, size_t size, const wchar_t* str, size_t length) | |
2317 { | |
2318 // convert to utf8 | |
2319 uint8_t* begin = reinterpret_cast<uint8_t*>(buffer); | |
2320 uint8_t* end = wchar_decoder::process(str, length, begin, utf8_writer()); | |
2321 | |
2322 assert(begin + size == end); | |
2323 (void)!end; | |
2324 (void)!size; | |
2325 } | |
2326 | |
2327 #ifndef PUGIXML_NO_STL | |
2328 PUGI_IMPL_FN std::string as_utf8_impl(const wchar_t* str, size_t length) | |
2329 { | |
2330 // first pass: get length in utf8 characters | |
2331 size_t size = as_utf8_begin(str, length); | |
2332 | |
2333 // allocate resulting string | |
2334 std::string result; | |
2335 result.resize(size); | |
2336 | |
2337 // second pass: convert to utf8 | |
2338 if (size > 0) as_utf8_end(&result[0], size, str, length); | |
2339 | |
2340 return result; | |
2341 } | |
2342 | |
2343 PUGI_IMPL_FN std::basic_string<wchar_t> as_wide_impl(const char* str, size_t size) | |
2344 { | |
2345 const uint8_t* data = reinterpret_cast<const uint8_t*>(str); | |
2346 | |
2347 // first pass: get length in wchar_t units | |
2348 size_t length = utf8_decoder::process(data, size, 0, wchar_counter()); | |
2349 | |
2350 // allocate resulting string | |
2351 std::basic_string<wchar_t> result; | |
2352 result.resize(length); | |
2353 | |
2354 // second pass: convert to wchar_t | |
2355 if (length > 0) | |
2356 { | |
2357 wchar_writer::value_type begin = reinterpret_cast<wchar_writer::value_type>(&result[0]); | |
2358 wchar_writer::value_type end = utf8_decoder::process(data, size, begin, wchar_writer()); | |
2359 | |
2360 assert(begin + length == end); | |
2361 (void)!end; | |
2362 } | |
2363 | |
2364 return result; | |
2365 } | |
2366 #endif | |
2367 | |
2368 template <typename Header> | |
2369 inline bool strcpy_insitu_allow(size_t length, const Header& header, uintptr_t header_mask, char_t* target) | |
2370 { | |
2371 // never reuse shared memory | |
2372 if (header & xml_memory_page_contents_shared_mask) return false; | |
2373 | |
2374 size_t target_length = strlength(target); | |
2375 | |
2376 // always reuse document buffer memory if possible | |
2377 if ((header & header_mask) == 0) return target_length >= length; | |
2378 | |
2379 // reuse heap memory if waste is not too great | |
2380 const size_t reuse_threshold = 32; | |
2381 | |
2382 return target_length >= length && (target_length < reuse_threshold || target_length - length < target_length / 2); | |
2383 } | |
2384 | |
2385 template <typename String, typename Header> | |
2386 PUGI_IMPL_FN bool strcpy_insitu(String& dest, Header& header, uintptr_t header_mask, const char_t* source, size_t source_length) | |
2387 { | |
2388 assert((header & header_mask) == 0 || dest); // header bit indicates whether dest was previously allocated | |
2389 | |
2390 if (source_length == 0) | |
2391 { | |
2392 // empty string and null pointer are equivalent, so just deallocate old memory | |
2393 xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator; | |
2394 | |
2395 if (header & header_mask) alloc->deallocate_string(dest); | |
2396 | |
2397 // mark the string as not allocated | |
2398 dest = 0; | |
2399 header &= ~header_mask; | |
2400 | |
2401 return true; | |
2402 } | |
2403 else if (dest && strcpy_insitu_allow(source_length, header, header_mask, dest)) | |
2404 { | |
2405 // we can reuse old buffer, so just copy the new data (including zero terminator) | |
2406 memcpy(dest, source, source_length * sizeof(char_t)); | |
2407 dest[source_length] = 0; | |
2408 | |
2409 return true; | |
2410 } | |
2411 else | |
2412 { | |
2413 xml_allocator* alloc = PUGI_IMPL_GETPAGE_IMPL(header)->allocator; | |
2414 | |
2415 if (!alloc->reserve()) return false; | |
2416 | |
2417 // allocate new buffer | |
2418 char_t* buf = alloc->allocate_string(source_length + 1); | |
2419 if (!buf) return false; | |
2420 | |
2421 // copy the string (including zero terminator) | |
2422 memcpy(buf, source, source_length * sizeof(char_t)); | |
2423 buf[source_length] = 0; | |
2424 | |
2425 // deallocate old buffer (*after* the above to protect against overlapping memory and/or allocation failures) | |
2426 if (header & header_mask) alloc->deallocate_string(dest); | |
2427 | |
2428 // the string is now allocated, so set the flag | |
2429 dest = buf; | |
2430 header |= header_mask; | |
2431 | |
2432 return true; | |
2433 } | |
2434 } | |
2435 | |
2436 struct gap | |
2437 { | |
2438 char_t* end; | |
2439 size_t size; | |
2440 | |
2441 gap(): end(0), size(0) | |
2442 { | |
2443 } | |
2444 | |
2445 // Push new gap, move s count bytes further (skipping the gap). | |
2446 // Collapse previous gap. | |
2447 void push(char_t*& s, size_t count) | |
2448 { | |
2449 if (end) // there was a gap already; collapse it | |
2450 { | |
2451 // Move [old_gap_end, new_gap_start) to [old_gap_start, ...) | |
2452 assert(s >= end); | |
2453 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
2454 } | |
2455 | |
2456 s += count; // end of current gap | |
2457 | |
2458 // "merge" two gaps | |
2459 end = s; | |
2460 size += count; | |
2461 } | |
2462 | |
2463 // Collapse all gaps, return past-the-end pointer | |
2464 char_t* flush(char_t* s) | |
2465 { | |
2466 if (end) | |
2467 { | |
2468 // Move [old_gap_end, current_pos) to [old_gap_start, ...) | |
2469 assert(s >= end); | |
2470 memmove(end - size, end, reinterpret_cast<char*>(s) - reinterpret_cast<char*>(end)); | |
2471 | |
2472 return s - size; | |
2473 } | |
2474 else return s; | |
2475 } | |
2476 }; | |
2477 | |
2478 PUGI_IMPL_FN char_t* strconv_escape(char_t* s, gap& g) | |
2479 { | |
2480 char_t* stre = s + 1; | |
2481 | |
2482 switch (*stre) | |
2483 { | |
2484 case '#': // &#... | |
2485 { | |
2486 unsigned int ucsc = 0; | |
2487 | |
2488 if (stre[1] == 'x') // &#x... (hex code) | |
2489 { | |
2490 stre += 2; | |
2491 | |
2492 char_t ch = *stre; | |
2493 | |
2494 if (ch == ';') return stre; | |
2495 | |
2496 for (;;) | |
2497 { | |
2498 if (static_cast<unsigned int>(ch - '0') <= 9) | |
2499 ucsc = 16 * ucsc + (ch - '0'); | |
2500 else if (static_cast<unsigned int>((ch | ' ') - 'a') <= 5) | |
2501 ucsc = 16 * ucsc + ((ch | ' ') - 'a' + 10); | |
2502 else if (ch == ';') | |
2503 break; | |
2504 else // cancel | |
2505 return stre; | |
2506 | |
2507 ch = *++stre; | |
2508 } | |
2509 | |
2510 ++stre; | |
2511 } | |
2512 else // &#... (dec code) | |
2513 { | |
2514 char_t ch = *++stre; | |
2515 | |
2516 if (ch == ';') return stre; | |
2517 | |
2518 for (;;) | |
2519 { | |
2520 if (static_cast<unsigned int>(ch - '0') <= 9) | |
2521 ucsc = 10 * ucsc + (ch - '0'); | |
2522 else if (ch == ';') | |
2523 break; | |
2524 else // cancel | |
2525 return stre; | |
2526 | |
2527 ch = *++stre; | |
2528 } | |
2529 | |
2530 ++stre; | |
2531 } | |
2532 | |
2533 #ifdef PUGIXML_WCHAR_MODE | |
2534 s = reinterpret_cast<char_t*>(wchar_writer::any(reinterpret_cast<wchar_writer::value_type>(s), ucsc)); | |
2535 #else | |
2536 s = reinterpret_cast<char_t*>(utf8_writer::any(reinterpret_cast<uint8_t*>(s), ucsc)); | |
2537 #endif | |
2538 | |
2539 g.push(s, stre - s); | |
2540 return stre; | |
2541 } | |
2542 | |
2543 case 'a': // &a | |
2544 { | |
2545 ++stre; | |
2546 | |
2547 if (*stre == 'm') // &am | |
2548 { | |
2549 if (*++stre == 'p' && *++stre == ';') // & | |
2550 { | |
2551 *s++ = '&'; | |
2552 ++stre; | |
2553 | |
2554 g.push(s, stre - s); | |
2555 return stre; | |
2556 } | |
2557 } | |
2558 else if (*stre == 'p') // &ap | |
2559 { | |
2560 if (*++stre == 'o' && *++stre == 's' && *++stre == ';') // ' | |
2561 { | |
2562 *s++ = '\''; | |
2563 ++stre; | |
2564 | |
2565 g.push(s, stre - s); | |
2566 return stre; | |
2567 } | |
2568 } | |
2569 break; | |
2570 } | |
2571 | |
2572 case 'g': // &g | |
2573 { | |
2574 if (*++stre == 't' && *++stre == ';') // > | |
2575 { | |
2576 *s++ = '>'; | |
2577 ++stre; | |
2578 | |
2579 g.push(s, stre - s); | |
2580 return stre; | |
2581 } | |
2582 break; | |
2583 } | |
2584 | |
2585 case 'l': // &l | |
2586 { | |
2587 if (*++stre == 't' && *++stre == ';') // < | |
2588 { | |
2589 *s++ = '<'; | |
2590 ++stre; | |
2591 | |
2592 g.push(s, stre - s); | |
2593 return stre; | |
2594 } | |
2595 break; | |
2596 } | |
2597 | |
2598 case 'q': // &q | |
2599 { | |
2600 if (*++stre == 'u' && *++stre == 'o' && *++stre == 't' && *++stre == ';') // " | |
2601 { | |
2602 *s++ = '"'; | |
2603 ++stre; | |
2604 | |
2605 g.push(s, stre - s); | |
2606 return stre; | |
2607 } | |
2608 break; | |
2609 } | |
2610 | |
2611 default: | |
2612 break; | |
2613 } | |
2614 | |
2615 return stre; | |
2616 } | |
2617 | |
2618 // Parser utilities | |
2619 #define PUGI_IMPL_ENDSWITH(c, e) ((c) == (e) || ((c) == 0 && endch == (e))) | |
2620 #define PUGI_IMPL_SKIPWS() { while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) ++s; } | |
2621 #define PUGI_IMPL_OPTSET(OPT) ( optmsk & (OPT) ) | |
2622 #define PUGI_IMPL_PUSHNODE(TYPE) { cursor = append_new_node(cursor, *alloc, TYPE); if (!cursor) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s); } | |
2623 #define PUGI_IMPL_POPNODE() { cursor = cursor->parent; } | |
2624 #define PUGI_IMPL_SCANFOR(X) { while (*s != 0 && !(X)) ++s; } | |
2625 #define PUGI_IMPL_SCANWHILE(X) { while (X) ++s; } | |
2626 #define PUGI_IMPL_SCANWHILE_UNROLL(X) { for (;;) { char_t ss = s[0]; if (PUGI_IMPL_UNLIKELY(!(X))) { break; } ss = s[1]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 1; break; } ss = s[2]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 2; break; } ss = s[3]; if (PUGI_IMPL_UNLIKELY(!(X))) { s += 3; break; } s += 4; } } | |
2627 #define PUGI_IMPL_ENDSEG() { ch = *s; *s = 0; ++s; } | |
2628 #define PUGI_IMPL_THROW_ERROR(err, m) return error_offset = m, error_status = err, static_cast<char_t*>(0) | |
2629 #define PUGI_IMPL_CHECK_ERROR(err, m) { if (*s == 0) PUGI_IMPL_THROW_ERROR(err, m); } | |
2630 | |
2631 PUGI_IMPL_FN char_t* strconv_comment(char_t* s, char_t endch) | |
2632 { | |
2633 gap g; | |
2634 | |
2635 while (true) | |
2636 { | |
2637 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_comment)); | |
2638 | |
2639 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2640 { | |
2641 *s++ = '\n'; // replace first one with 0x0a | |
2642 | |
2643 if (*s == '\n') g.push(s, 1); | |
2644 } | |
2645 else if (s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>')) // comment ends here | |
2646 { | |
2647 *g.flush(s) = 0; | |
2648 | |
2649 return s + (s[2] == '>' ? 3 : 2); | |
2650 } | |
2651 else if (*s == 0) | |
2652 { | |
2653 return 0; | |
2654 } | |
2655 else ++s; | |
2656 } | |
2657 } | |
2658 | |
2659 PUGI_IMPL_FN char_t* strconv_cdata(char_t* s, char_t endch) | |
2660 { | |
2661 gap g; | |
2662 | |
2663 while (true) | |
2664 { | |
2665 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_cdata)); | |
2666 | |
2667 if (*s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2668 { | |
2669 *s++ = '\n'; // replace first one with 0x0a | |
2670 | |
2671 if (*s == '\n') g.push(s, 1); | |
2672 } | |
2673 else if (s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>')) // CDATA ends here | |
2674 { | |
2675 *g.flush(s) = 0; | |
2676 | |
2677 return s + 1; | |
2678 } | |
2679 else if (*s == 0) | |
2680 { | |
2681 return 0; | |
2682 } | |
2683 else ++s; | |
2684 } | |
2685 } | |
2686 | |
2687 typedef char_t* (*strconv_pcdata_t)(char_t*); | |
2688 | |
2689 template <typename opt_trim, typename opt_eol, typename opt_escape> struct strconv_pcdata_impl | |
2690 { | |
2691 static char_t* parse(char_t* s) | |
2692 { | |
2693 gap g; | |
2694 | |
2695 char_t* begin = s; | |
2696 | |
2697 while (true) | |
2698 { | |
2699 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_pcdata)); | |
2700 | |
2701 if (*s == '<') // PCDATA ends here | |
2702 { | |
2703 char_t* end = g.flush(s); | |
2704 | |
2705 if (opt_trim::value) | |
2706 while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space)) | |
2707 --end; | |
2708 | |
2709 *end = 0; | |
2710 | |
2711 return s + 1; | |
2712 } | |
2713 else if (opt_eol::value && *s == '\r') // Either a single 0x0d or 0x0d 0x0a pair | |
2714 { | |
2715 *s++ = '\n'; // replace first one with 0x0a | |
2716 | |
2717 if (*s == '\n') g.push(s, 1); | |
2718 } | |
2719 else if (opt_escape::value && *s == '&') | |
2720 { | |
2721 s = strconv_escape(s, g); | |
2722 } | |
2723 else if (*s == 0) | |
2724 { | |
2725 char_t* end = g.flush(s); | |
2726 | |
2727 if (opt_trim::value) | |
2728 while (end > begin && PUGI_IMPL_IS_CHARTYPE(end[-1], ct_space)) | |
2729 --end; | |
2730 | |
2731 *end = 0; | |
2732 | |
2733 return s; | |
2734 } | |
2735 else ++s; | |
2736 } | |
2737 } | |
2738 }; | |
2739 | |
2740 PUGI_IMPL_FN strconv_pcdata_t get_strconv_pcdata(unsigned int optmask) | |
2741 { | |
2742 PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_trim_pcdata == 0x0800); | |
2743 | |
2744 switch (((optmask >> 4) & 3) | ((optmask >> 9) & 4)) // get bitmask for flags (trim eol escapes); this simultaneously checks 3 options from assertion above | |
2745 { | |
2746 case 0: return strconv_pcdata_impl<opt_false, opt_false, opt_false>::parse; | |
2747 case 1: return strconv_pcdata_impl<opt_false, opt_false, opt_true>::parse; | |
2748 case 2: return strconv_pcdata_impl<opt_false, opt_true, opt_false>::parse; | |
2749 case 3: return strconv_pcdata_impl<opt_false, opt_true, opt_true>::parse; | |
2750 case 4: return strconv_pcdata_impl<opt_true, opt_false, opt_false>::parse; | |
2751 case 5: return strconv_pcdata_impl<opt_true, opt_false, opt_true>::parse; | |
2752 case 6: return strconv_pcdata_impl<opt_true, opt_true, opt_false>::parse; | |
2753 case 7: return strconv_pcdata_impl<opt_true, opt_true, opt_true>::parse; | |
2754 default: assert(false); return 0; // unreachable | |
2755 } | |
2756 } | |
2757 | |
2758 typedef char_t* (*strconv_attribute_t)(char_t*, char_t); | |
2759 | |
2760 template <typename opt_escape> struct strconv_attribute_impl | |
2761 { | |
2762 static char_t* parse_wnorm(char_t* s, char_t end_quote) | |
2763 { | |
2764 gap g; | |
2765 | |
2766 // trim leading whitespaces | |
2767 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) | |
2768 { | |
2769 char_t* str = s; | |
2770 | |
2771 do ++str; | |
2772 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space)); | |
2773 | |
2774 g.push(s, str - s); | |
2775 } | |
2776 | |
2777 while (true) | |
2778 { | |
2779 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws | ct_space)); | |
2780 | |
2781 if (*s == end_quote) | |
2782 { | |
2783 char_t* str = g.flush(s); | |
2784 | |
2785 do *str-- = 0; | |
2786 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space)); | |
2787 | |
2788 return s + 1; | |
2789 } | |
2790 else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) | |
2791 { | |
2792 *s++ = ' '; | |
2793 | |
2794 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) | |
2795 { | |
2796 char_t* str = s + 1; | |
2797 while (PUGI_IMPL_IS_CHARTYPE(*str, ct_space)) ++str; | |
2798 | |
2799 g.push(s, str - s); | |
2800 } | |
2801 } | |
2802 else if (opt_escape::value && *s == '&') | |
2803 { | |
2804 s = strconv_escape(s, g); | |
2805 } | |
2806 else if (!*s) | |
2807 { | |
2808 return 0; | |
2809 } | |
2810 else ++s; | |
2811 } | |
2812 } | |
2813 | |
2814 static char_t* parse_wconv(char_t* s, char_t end_quote) | |
2815 { | |
2816 gap g; | |
2817 | |
2818 while (true) | |
2819 { | |
2820 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr_ws)); | |
2821 | |
2822 if (*s == end_quote) | |
2823 { | |
2824 *g.flush(s) = 0; | |
2825 | |
2826 return s + 1; | |
2827 } | |
2828 else if (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) | |
2829 { | |
2830 if (*s == '\r') | |
2831 { | |
2832 *s++ = ' '; | |
2833 | |
2834 if (*s == '\n') g.push(s, 1); | |
2835 } | |
2836 else *s++ = ' '; | |
2837 } | |
2838 else if (opt_escape::value && *s == '&') | |
2839 { | |
2840 s = strconv_escape(s, g); | |
2841 } | |
2842 else if (!*s) | |
2843 { | |
2844 return 0; | |
2845 } | |
2846 else ++s; | |
2847 } | |
2848 } | |
2849 | |
2850 static char_t* parse_eol(char_t* s, char_t end_quote) | |
2851 { | |
2852 gap g; | |
2853 | |
2854 while (true) | |
2855 { | |
2856 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr)); | |
2857 | |
2858 if (*s == end_quote) | |
2859 { | |
2860 *g.flush(s) = 0; | |
2861 | |
2862 return s + 1; | |
2863 } | |
2864 else if (*s == '\r') | |
2865 { | |
2866 *s++ = '\n'; | |
2867 | |
2868 if (*s == '\n') g.push(s, 1); | |
2869 } | |
2870 else if (opt_escape::value && *s == '&') | |
2871 { | |
2872 s = strconv_escape(s, g); | |
2873 } | |
2874 else if (!*s) | |
2875 { | |
2876 return 0; | |
2877 } | |
2878 else ++s; | |
2879 } | |
2880 } | |
2881 | |
2882 static char_t* parse_simple(char_t* s, char_t end_quote) | |
2883 { | |
2884 gap g; | |
2885 | |
2886 while (true) | |
2887 { | |
2888 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPE(ss, ct_parse_attr)); | |
2889 | |
2890 if (*s == end_quote) | |
2891 { | |
2892 *g.flush(s) = 0; | |
2893 | |
2894 return s + 1; | |
2895 } | |
2896 else if (opt_escape::value && *s == '&') | |
2897 { | |
2898 s = strconv_escape(s, g); | |
2899 } | |
2900 else if (!*s) | |
2901 { | |
2902 return 0; | |
2903 } | |
2904 else ++s; | |
2905 } | |
2906 } | |
2907 }; | |
2908 | |
2909 PUGI_IMPL_FN strconv_attribute_t get_strconv_attribute(unsigned int optmask) | |
2910 { | |
2911 PUGI_IMPL_STATIC_ASSERT(parse_escapes == 0x10 && parse_eol == 0x20 && parse_wconv_attribute == 0x40 && parse_wnorm_attribute == 0x80); | |
2912 | |
2913 switch ((optmask >> 4) & 15) // get bitmask for flags (wnorm wconv eol escapes); this simultaneously checks 4 options from assertion above | |
2914 { | |
2915 case 0: return strconv_attribute_impl<opt_false>::parse_simple; | |
2916 case 1: return strconv_attribute_impl<opt_true>::parse_simple; | |
2917 case 2: return strconv_attribute_impl<opt_false>::parse_eol; | |
2918 case 3: return strconv_attribute_impl<opt_true>::parse_eol; | |
2919 case 4: return strconv_attribute_impl<opt_false>::parse_wconv; | |
2920 case 5: return strconv_attribute_impl<opt_true>::parse_wconv; | |
2921 case 6: return strconv_attribute_impl<opt_false>::parse_wconv; | |
2922 case 7: return strconv_attribute_impl<opt_true>::parse_wconv; | |
2923 case 8: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2924 case 9: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2925 case 10: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2926 case 11: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2927 case 12: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2928 case 13: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2929 case 14: return strconv_attribute_impl<opt_false>::parse_wnorm; | |
2930 case 15: return strconv_attribute_impl<opt_true>::parse_wnorm; | |
2931 default: assert(false); return 0; // unreachable | |
2932 } | |
2933 } | |
2934 | |
2935 inline xml_parse_result make_parse_result(xml_parse_status status, ptrdiff_t offset = 0) | |
2936 { | |
2937 xml_parse_result result; | |
2938 result.status = status; | |
2939 result.offset = offset; | |
2940 | |
2941 return result; | |
2942 } | |
2943 | |
2944 struct xml_parser | |
2945 { | |
2946 xml_allocator* alloc; | |
2947 char_t* error_offset; | |
2948 xml_parse_status error_status; | |
2949 | |
2950 xml_parser(xml_allocator* alloc_): alloc(alloc_), error_offset(0), error_status(status_ok) | |
2951 { | |
2952 } | |
2953 | |
2954 // DOCTYPE consists of nested sections of the following possible types: | |
2955 // <!-- ... -->, <? ... ?>, "...", '...' | |
2956 // <![...]]> | |
2957 // <!...> | |
2958 // First group can not contain nested groups | |
2959 // Second group can contain nested groups of the same type | |
2960 // Third group can contain all other groups | |
2961 char_t* parse_doctype_primitive(char_t* s) | |
2962 { | |
2963 if (*s == '"' || *s == '\'') | |
2964 { | |
2965 // quoted string | |
2966 char_t ch = *s++; | |
2967 PUGI_IMPL_SCANFOR(*s == ch); | |
2968 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
2969 | |
2970 s++; | |
2971 } | |
2972 else if (s[0] == '<' && s[1] == '?') | |
2973 { | |
2974 // <? ... ?> | |
2975 s += 2; | |
2976 PUGI_IMPL_SCANFOR(s[0] == '?' && s[1] == '>'); // no need for ENDSWITH because ?> can't terminate proper doctype | |
2977 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
2978 | |
2979 s += 2; | |
2980 } | |
2981 else if (s[0] == '<' && s[1] == '!' && s[2] == '-' && s[3] == '-') | |
2982 { | |
2983 s += 4; | |
2984 PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && s[2] == '>'); // no need for ENDSWITH because --> can't terminate proper doctype | |
2985 if (!*s) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
2986 | |
2987 s += 3; | |
2988 } | |
2989 else PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
2990 | |
2991 return s; | |
2992 } | |
2993 | |
2994 char_t* parse_doctype_ignore(char_t* s) | |
2995 { | |
2996 size_t depth = 0; | |
2997 | |
2998 assert(s[0] == '<' && s[1] == '!' && s[2] == '['); | |
2999 s += 3; | |
3000 | |
3001 while (*s) | |
3002 { | |
3003 if (s[0] == '<' && s[1] == '!' && s[2] == '[') | |
3004 { | |
3005 // nested ignore section | |
3006 s += 3; | |
3007 depth++; | |
3008 } | |
3009 else if (s[0] == ']' && s[1] == ']' && s[2] == '>') | |
3010 { | |
3011 // ignore section end | |
3012 s += 3; | |
3013 | |
3014 if (depth == 0) | |
3015 return s; | |
3016 | |
3017 depth--; | |
3018 } | |
3019 else s++; | |
3020 } | |
3021 | |
3022 PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
3023 } | |
3024 | |
3025 char_t* parse_doctype_group(char_t* s, char_t endch) | |
3026 { | |
3027 size_t depth = 0; | |
3028 | |
3029 assert((s[0] == '<' || s[0] == 0) && s[1] == '!'); | |
3030 s += 2; | |
3031 | |
3032 while (*s) | |
3033 { | |
3034 if (s[0] == '<' && s[1] == '!' && s[2] != '-') | |
3035 { | |
3036 if (s[2] == '[') | |
3037 { | |
3038 // ignore | |
3039 s = parse_doctype_ignore(s); | |
3040 if (!s) return s; | |
3041 } | |
3042 else | |
3043 { | |
3044 // some control group | |
3045 s += 2; | |
3046 depth++; | |
3047 } | |
3048 } | |
3049 else if (s[0] == '<' || s[0] == '"' || s[0] == '\'') | |
3050 { | |
3051 // unknown tag (forbidden), or some primitive group | |
3052 s = parse_doctype_primitive(s); | |
3053 if (!s) return s; | |
3054 } | |
3055 else if (*s == '>') | |
3056 { | |
3057 if (depth == 0) | |
3058 return s; | |
3059 | |
3060 depth--; | |
3061 s++; | |
3062 } | |
3063 else s++; | |
3064 } | |
3065 | |
3066 if (depth != 0 || endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
3067 | |
3068 return s; | |
3069 } | |
3070 | |
3071 char_t* parse_exclamation(char_t* s, xml_node_struct* cursor, unsigned int optmsk, char_t endch) | |
3072 { | |
3073 // parse node contents, starting with exclamation mark | |
3074 ++s; | |
3075 | |
3076 if (*s == '-') // '<!-...' | |
3077 { | |
3078 ++s; | |
3079 | |
3080 if (*s == '-') // '<!--...' | |
3081 { | |
3082 ++s; | |
3083 | |
3084 if (PUGI_IMPL_OPTSET(parse_comments)) | |
3085 { | |
3086 PUGI_IMPL_PUSHNODE(node_comment); // Append a new node on the tree. | |
3087 cursor->value = s; // Save the offset. | |
3088 } | |
3089 | |
3090 if (PUGI_IMPL_OPTSET(parse_eol) && PUGI_IMPL_OPTSET(parse_comments)) | |
3091 { | |
3092 s = strconv_comment(s, endch); | |
3093 | |
3094 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_comment, cursor->value); | |
3095 } | |
3096 else | |
3097 { | |
3098 // Scan for terminating '-->'. | |
3099 PUGI_IMPL_SCANFOR(s[0] == '-' && s[1] == '-' && PUGI_IMPL_ENDSWITH(s[2], '>')); | |
3100 PUGI_IMPL_CHECK_ERROR(status_bad_comment, s); | |
3101 | |
3102 if (PUGI_IMPL_OPTSET(parse_comments)) | |
3103 *s = 0; // Zero-terminate this segment at the first terminating '-'. | |
3104 | |
3105 s += (s[2] == '>' ? 3 : 2); // Step over the '\0->'. | |
3106 } | |
3107 } | |
3108 else PUGI_IMPL_THROW_ERROR(status_bad_comment, s); | |
3109 } | |
3110 else if (*s == '[') | |
3111 { | |
3112 // '<![CDATA[...' | |
3113 if (*++s=='C' && *++s=='D' && *++s=='A' && *++s=='T' && *++s=='A' && *++s == '[') | |
3114 { | |
3115 ++s; | |
3116 | |
3117 if (PUGI_IMPL_OPTSET(parse_cdata)) | |
3118 { | |
3119 PUGI_IMPL_PUSHNODE(node_cdata); // Append a new node on the tree. | |
3120 cursor->value = s; // Save the offset. | |
3121 | |
3122 if (PUGI_IMPL_OPTSET(parse_eol)) | |
3123 { | |
3124 s = strconv_cdata(s, endch); | |
3125 | |
3126 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_cdata, cursor->value); | |
3127 } | |
3128 else | |
3129 { | |
3130 // Scan for terminating ']]>'. | |
3131 PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>')); | |
3132 PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s); | |
3133 | |
3134 *s++ = 0; // Zero-terminate this segment. | |
3135 } | |
3136 } | |
3137 else // Flagged for discard, but we still have to scan for the terminator. | |
3138 { | |
3139 // Scan for terminating ']]>'. | |
3140 PUGI_IMPL_SCANFOR(s[0] == ']' && s[1] == ']' && PUGI_IMPL_ENDSWITH(s[2], '>')); | |
3141 PUGI_IMPL_CHECK_ERROR(status_bad_cdata, s); | |
3142 | |
3143 ++s; | |
3144 } | |
3145 | |
3146 s += (s[1] == '>' ? 2 : 1); // Step over the last ']>'. | |
3147 } | |
3148 else PUGI_IMPL_THROW_ERROR(status_bad_cdata, s); | |
3149 } | |
3150 else if (s[0] == 'D' && s[1] == 'O' && s[2] == 'C' && s[3] == 'T' && s[4] == 'Y' && s[5] == 'P' && PUGI_IMPL_ENDSWITH(s[6], 'E')) | |
3151 { | |
3152 s -= 2; | |
3153 | |
3154 if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_doctype, s); | |
3155 | |
3156 char_t* mark = s + 9; | |
3157 | |
3158 s = parse_doctype_group(s, endch); | |
3159 if (!s) return s; | |
3160 | |
3161 assert((*s == 0 && endch == '>') || *s == '>'); | |
3162 if (*s) *s++ = 0; | |
3163 | |
3164 if (PUGI_IMPL_OPTSET(parse_doctype)) | |
3165 { | |
3166 while (PUGI_IMPL_IS_CHARTYPE(*mark, ct_space)) ++mark; | |
3167 | |
3168 PUGI_IMPL_PUSHNODE(node_doctype); | |
3169 | |
3170 cursor->value = mark; | |
3171 } | |
3172 } | |
3173 else if (*s == 0 && endch == '-') PUGI_IMPL_THROW_ERROR(status_bad_comment, s); | |
3174 else if (*s == 0 && endch == '[') PUGI_IMPL_THROW_ERROR(status_bad_cdata, s); | |
3175 else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s); | |
3176 | |
3177 return s; | |
3178 } | |
3179 | |
3180 char_t* parse_question(char_t* s, xml_node_struct*& ref_cursor, unsigned int optmsk, char_t endch) | |
3181 { | |
3182 // load into registers | |
3183 xml_node_struct* cursor = ref_cursor; | |
3184 char_t ch = 0; | |
3185 | |
3186 // parse node contents, starting with question mark | |
3187 ++s; | |
3188 | |
3189 // read PI target | |
3190 char_t* target = s; | |
3191 | |
3192 if (!PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_pi, s); | |
3193 | |
3194 PUGI_IMPL_SCANWHILE(PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol)); | |
3195 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s); | |
3196 | |
3197 // determine node type; stricmp / strcasecmp is not portable | |
3198 bool declaration = (target[0] | ' ') == 'x' && (target[1] | ' ') == 'm' && (target[2] | ' ') == 'l' && target + 3 == s; | |
3199 | |
3200 if (declaration ? PUGI_IMPL_OPTSET(parse_declaration) : PUGI_IMPL_OPTSET(parse_pi)) | |
3201 { | |
3202 if (declaration) | |
3203 { | |
3204 // disallow non top-level declarations | |
3205 if (cursor->parent) PUGI_IMPL_THROW_ERROR(status_bad_pi, s); | |
3206 | |
3207 PUGI_IMPL_PUSHNODE(node_declaration); | |
3208 } | |
3209 else | |
3210 { | |
3211 PUGI_IMPL_PUSHNODE(node_pi); | |
3212 } | |
3213 | |
3214 cursor->name = target; | |
3215 | |
3216 PUGI_IMPL_ENDSEG(); | |
3217 | |
3218 // parse value/attributes | |
3219 if (ch == '?') | |
3220 { | |
3221 // empty node | |
3222 if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_pi, s); | |
3223 s += (*s == '>'); | |
3224 | |
3225 PUGI_IMPL_POPNODE(); | |
3226 } | |
3227 else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space)) | |
3228 { | |
3229 PUGI_IMPL_SKIPWS(); | |
3230 | |
3231 // scan for tag end | |
3232 char_t* value = s; | |
3233 | |
3234 PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>')); | |
3235 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s); | |
3236 | |
3237 if (declaration) | |
3238 { | |
3239 // replace ending ? with / so that 'element' terminates properly | |
3240 *s = '/'; | |
3241 | |
3242 // we exit from this function with cursor at node_declaration, which is a signal to parse() to go to LOC_ATTRIBUTES | |
3243 s = value; | |
3244 } | |
3245 else | |
3246 { | |
3247 // store value and step over > | |
3248 cursor->value = value; | |
3249 | |
3250 PUGI_IMPL_POPNODE(); | |
3251 | |
3252 PUGI_IMPL_ENDSEG(); | |
3253 | |
3254 s += (*s == '>'); | |
3255 } | |
3256 } | |
3257 else PUGI_IMPL_THROW_ERROR(status_bad_pi, s); | |
3258 } | |
3259 else | |
3260 { | |
3261 // scan for tag end | |
3262 PUGI_IMPL_SCANFOR(s[0] == '?' && PUGI_IMPL_ENDSWITH(s[1], '>')); | |
3263 PUGI_IMPL_CHECK_ERROR(status_bad_pi, s); | |
3264 | |
3265 s += (s[1] == '>' ? 2 : 1); | |
3266 } | |
3267 | |
3268 // store from registers | |
3269 ref_cursor = cursor; | |
3270 | |
3271 return s; | |
3272 } | |
3273 | |
3274 char_t* parse_tree(char_t* s, xml_node_struct* root, unsigned int optmsk, char_t endch) | |
3275 { | |
3276 strconv_attribute_t strconv_attribute = get_strconv_attribute(optmsk); | |
3277 strconv_pcdata_t strconv_pcdata = get_strconv_pcdata(optmsk); | |
3278 | |
3279 char_t ch = 0; | |
3280 xml_node_struct* cursor = root; | |
3281 char_t* mark = s; | |
3282 char_t* merged_pcdata = s; | |
3283 | |
3284 while (*s != 0) | |
3285 { | |
3286 if (*s == '<') | |
3287 { | |
3288 ++s; | |
3289 | |
3290 LOC_TAG: | |
3291 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // '<#...' | |
3292 { | |
3293 PUGI_IMPL_PUSHNODE(node_element); // Append a new node to the tree. | |
3294 | |
3295 cursor->name = s; | |
3296 | |
3297 PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
3298 PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over. | |
3299 | |
3300 if (ch == '>') | |
3301 { | |
3302 // end of tag | |
3303 } | |
3304 else if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space)) | |
3305 { | |
3306 LOC_ATTRIBUTES: | |
3307 while (true) | |
3308 { | |
3309 PUGI_IMPL_SKIPWS(); // Eat any whitespace. | |
3310 | |
3311 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) // <... #... | |
3312 { | |
3313 xml_attribute_struct* a = append_new_attribute(cursor, *alloc); // Make space for this attribute. | |
3314 if (!a) PUGI_IMPL_THROW_ERROR(status_out_of_memory, s); | |
3315 | |
3316 a->name = s; // Save the offset. | |
3317 | |
3318 PUGI_IMPL_SCANWHILE_UNROLL(PUGI_IMPL_IS_CHARTYPE(ss, ct_symbol)); // Scan for a terminator. | |
3319 PUGI_IMPL_ENDSEG(); // Save char in 'ch', terminate & step over. | |
3320 | |
3321 if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space)) | |
3322 { | |
3323 PUGI_IMPL_SKIPWS(); // Eat any whitespace. | |
3324 | |
3325 ch = *s; | |
3326 ++s; | |
3327 } | |
3328 | |
3329 if (ch == '=') // '<... #=...' | |
3330 { | |
3331 PUGI_IMPL_SKIPWS(); // Eat any whitespace. | |
3332 | |
3333 if (*s == '"' || *s == '\'') // '<... #="...' | |
3334 { | |
3335 ch = *s; // Save quote char to avoid breaking on "''" -or- '""'. | |
3336 ++s; // Step over the quote. | |
3337 a->value = s; // Save the offset. | |
3338 | |
3339 s = strconv_attribute(s, ch); | |
3340 | |
3341 if (!s) PUGI_IMPL_THROW_ERROR(status_bad_attribute, a->value); | |
3342 | |
3343 // After this line the loop continues from the start; | |
3344 // Whitespaces, / and > are ok, symbols and EOF are wrong, | |
3345 // everything else will be detected | |
3346 if (PUGI_IMPL_IS_CHARTYPE(*s, ct_start_symbol)) PUGI_IMPL_THROW_ERROR(status_bad_attribute, s); | |
3347 } | |
3348 else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s); | |
3349 } | |
3350 else PUGI_IMPL_THROW_ERROR(status_bad_attribute, s); | |
3351 } | |
3352 else if (*s == '/') | |
3353 { | |
3354 ++s; | |
3355 | |
3356 if (*s == '>') | |
3357 { | |
3358 PUGI_IMPL_POPNODE(); | |
3359 s++; | |
3360 break; | |
3361 } | |
3362 else if (*s == 0 && endch == '>') | |
3363 { | |
3364 PUGI_IMPL_POPNODE(); | |
3365 break; | |
3366 } | |
3367 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s); | |
3368 } | |
3369 else if (*s == '>') | |
3370 { | |
3371 ++s; | |
3372 | |
3373 break; | |
3374 } | |
3375 else if (*s == 0 && endch == '>') | |
3376 { | |
3377 break; | |
3378 } | |
3379 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s); | |
3380 } | |
3381 | |
3382 // !!! | |
3383 } | |
3384 else if (ch == '/') // '<#.../' | |
3385 { | |
3386 if (!PUGI_IMPL_ENDSWITH(*s, '>')) PUGI_IMPL_THROW_ERROR(status_bad_start_element, s); | |
3387 | |
3388 PUGI_IMPL_POPNODE(); // Pop. | |
3389 | |
3390 s += (*s == '>'); | |
3391 } | |
3392 else if (ch == 0) | |
3393 { | |
3394 // we stepped over null terminator, backtrack & handle closing tag | |
3395 --s; | |
3396 | |
3397 if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_start_element, s); | |
3398 } | |
3399 else PUGI_IMPL_THROW_ERROR(status_bad_start_element, s); | |
3400 } | |
3401 else if (*s == '/') | |
3402 { | |
3403 ++s; | |
3404 | |
3405 mark = s; | |
3406 | |
3407 char_t* name = cursor->name; | |
3408 if (!name) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark); | |
3409 | |
3410 while (PUGI_IMPL_IS_CHARTYPE(*s, ct_symbol)) | |
3411 { | |
3412 if (*s++ != *name++) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark); | |
3413 } | |
3414 | |
3415 if (*name) | |
3416 { | |
3417 if (*s == 0 && name[0] == endch && name[1] == 0) PUGI_IMPL_THROW_ERROR(status_bad_end_element, s); | |
3418 else PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, mark); | |
3419 } | |
3420 | |
3421 PUGI_IMPL_POPNODE(); // Pop. | |
3422 | |
3423 PUGI_IMPL_SKIPWS(); | |
3424 | |
3425 if (*s == 0) | |
3426 { | |
3427 if (endch != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s); | |
3428 } | |
3429 else | |
3430 { | |
3431 if (*s != '>') PUGI_IMPL_THROW_ERROR(status_bad_end_element, s); | |
3432 ++s; | |
3433 } | |
3434 } | |
3435 else if (*s == '?') // '<?...' | |
3436 { | |
3437 s = parse_question(s, cursor, optmsk, endch); | |
3438 if (!s) return s; | |
3439 | |
3440 assert(cursor); | |
3441 if (PUGI_IMPL_NODETYPE(cursor) == node_declaration) goto LOC_ATTRIBUTES; | |
3442 } | |
3443 else if (*s == '!') // '<!...' | |
3444 { | |
3445 s = parse_exclamation(s, cursor, optmsk, endch); | |
3446 if (!s) return s; | |
3447 } | |
3448 else if (*s == 0 && endch == '?') PUGI_IMPL_THROW_ERROR(status_bad_pi, s); | |
3449 else PUGI_IMPL_THROW_ERROR(status_unrecognized_tag, s); | |
3450 } | |
3451 else | |
3452 { | |
3453 mark = s; // Save this offset while searching for a terminator. | |
3454 | |
3455 PUGI_IMPL_SKIPWS(); // Eat whitespace if no genuine PCDATA here. | |
3456 | |
3457 if (*s == '<' || !*s) | |
3458 { | |
3459 // We skipped some whitespace characters because otherwise we would take the tag branch instead of PCDATA one | |
3460 assert(mark != s); | |
3461 | |
3462 if (!PUGI_IMPL_OPTSET(parse_ws_pcdata | parse_ws_pcdata_single) || PUGI_IMPL_OPTSET(parse_trim_pcdata)) | |
3463 { | |
3464 continue; | |
3465 } | |
3466 else if (PUGI_IMPL_OPTSET(parse_ws_pcdata_single)) | |
3467 { | |
3468 if (s[0] != '<' || s[1] != '/' || cursor->first_child) continue; | |
3469 } | |
3470 } | |
3471 | |
3472 if (!PUGI_IMPL_OPTSET(parse_trim_pcdata)) | |
3473 s = mark; | |
3474 | |
3475 if (cursor->parent || PUGI_IMPL_OPTSET(parse_fragment)) | |
3476 { | |
3477 char_t* parsed_pcdata = s; | |
3478 | |
3479 s = strconv_pcdata(s); | |
3480 | |
3481 if (PUGI_IMPL_OPTSET(parse_embed_pcdata) && cursor->parent && !cursor->first_child && !cursor->value) | |
3482 { | |
3483 cursor->value = parsed_pcdata; // Save the offset. | |
3484 } | |
3485 else if (PUGI_IMPL_OPTSET(parse_merge_pcdata) && cursor->first_child && PUGI_IMPL_NODETYPE(cursor->first_child->prev_sibling_c) == node_pcdata) | |
3486 { | |
3487 assert(merged_pcdata >= cursor->first_child->prev_sibling_c->value); | |
3488 | |
3489 // Catch up to the end of last parsed value; only needed for the first fragment. | |
3490 merged_pcdata += strlength(merged_pcdata); | |
3491 | |
3492 size_t length = strlength(parsed_pcdata); | |
3493 | |
3494 // Must use memmove instead of memcpy as this move may overlap | |
3495 memmove(merged_pcdata, parsed_pcdata, (length + 1) * sizeof(char_t)); | |
3496 merged_pcdata += length; | |
3497 } | |
3498 else | |
3499 { | |
3500 xml_node_struct* prev_cursor = cursor; | |
3501 PUGI_IMPL_PUSHNODE(node_pcdata); // Append a new node on the tree. | |
3502 | |
3503 cursor->value = parsed_pcdata; // Save the offset. | |
3504 merged_pcdata = parsed_pcdata; // Used for parse_merge_pcdata above, cheaper to save unconditionally | |
3505 | |
3506 cursor = prev_cursor; // Pop since this is a standalone. | |
3507 } | |
3508 | |
3509 if (!*s) break; | |
3510 } | |
3511 else | |
3512 { | |
3513 PUGI_IMPL_SCANFOR(*s == '<'); // '...<' | |
3514 if (!*s) break; | |
3515 | |
3516 ++s; | |
3517 } | |
3518 | |
3519 // We're after '<' | |
3520 goto LOC_TAG; | |
3521 } | |
3522 } | |
3523 | |
3524 // check that last tag is closed | |
3525 if (cursor != root) PUGI_IMPL_THROW_ERROR(status_end_element_mismatch, s); | |
3526 | |
3527 return s; | |
3528 } | |
3529 | |
3530 #ifdef PUGIXML_WCHAR_MODE | |
3531 static char_t* parse_skip_bom(char_t* s) | |
3532 { | |
3533 unsigned int bom = 0xfeff; | |
3534 return (s[0] == static_cast<wchar_t>(bom)) ? s + 1 : s; | |
3535 } | |
3536 #else | |
3537 static char_t* parse_skip_bom(char_t* s) | |
3538 { | |
3539 return (s[0] == '\xef' && s[1] == '\xbb' && s[2] == '\xbf') ? s + 3 : s; | |
3540 } | |
3541 #endif | |
3542 | |
3543 static bool has_element_node_siblings(xml_node_struct* node) | |
3544 { | |
3545 while (node) | |
3546 { | |
3547 if (PUGI_IMPL_NODETYPE(node) == node_element) return true; | |
3548 | |
3549 node = node->next_sibling; | |
3550 } | |
3551 | |
3552 return false; | |
3553 } | |
3554 | |
3555 static xml_parse_result parse(char_t* buffer, size_t length, xml_document_struct* xmldoc, xml_node_struct* root, unsigned int optmsk) | |
3556 { | |
3557 // early-out for empty documents | |
3558 if (length == 0) | |
3559 return make_parse_result(PUGI_IMPL_OPTSET(parse_fragment) ? status_ok : status_no_document_element); | |
3560 | |
3561 // get last child of the root before parsing | |
3562 xml_node_struct* last_root_child = root->first_child ? root->first_child->prev_sibling_c + 0 : 0; | |
3563 | |
3564 // create parser on stack | |
3565 xml_parser parser(static_cast<xml_allocator*>(xmldoc)); | |
3566 | |
3567 // save last character and make buffer zero-terminated (speeds up parsing) | |
3568 char_t endch = buffer[length - 1]; | |
3569 buffer[length - 1] = 0; | |
3570 | |
3571 // skip BOM to make sure it does not end up as part of parse output | |
3572 char_t* buffer_data = parse_skip_bom(buffer); | |
3573 | |
3574 // perform actual parsing | |
3575 parser.parse_tree(buffer_data, root, optmsk, endch); | |
3576 | |
3577 xml_parse_result result = make_parse_result(parser.error_status, parser.error_offset ? parser.error_offset - buffer : 0); | |
3578 assert(result.offset >= 0 && static_cast<size_t>(result.offset) <= length); | |
3579 | |
3580 if (result) | |
3581 { | |
3582 // since we removed last character, we have to handle the only possible false positive (stray <) | |
3583 if (endch == '<') | |
3584 return make_parse_result(status_unrecognized_tag, length - 1); | |
3585 | |
3586 // check if there are any element nodes parsed | |
3587 xml_node_struct* first_root_child_parsed = last_root_child ? last_root_child->next_sibling + 0 : root->first_child + 0; | |
3588 | |
3589 if (!PUGI_IMPL_OPTSET(parse_fragment) && !has_element_node_siblings(first_root_child_parsed)) | |
3590 return make_parse_result(status_no_document_element, length - 1); | |
3591 } | |
3592 else | |
3593 { | |
3594 // roll back offset if it occurs on a null terminator in the source buffer | |
3595 if (result.offset > 0 && static_cast<size_t>(result.offset) == length - 1 && endch == 0) | |
3596 result.offset--; | |
3597 } | |
3598 | |
3599 return result; | |
3600 } | |
3601 }; | |
3602 | |
3603 // Output facilities | |
3604 PUGI_IMPL_FN xml_encoding get_write_native_encoding() | |
3605 { | |
3606 #ifdef PUGIXML_WCHAR_MODE | |
3607 return get_wchar_encoding(); | |
3608 #else | |
3609 return encoding_utf8; | |
3610 #endif | |
3611 } | |
3612 | |
3613 PUGI_IMPL_FN xml_encoding get_write_encoding(xml_encoding encoding) | |
3614 { | |
3615 // replace wchar encoding with utf implementation | |
3616 if (encoding == encoding_wchar) return get_wchar_encoding(); | |
3617 | |
3618 // replace utf16 encoding with utf16 with specific endianness | |
3619 if (encoding == encoding_utf16) return is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3620 | |
3621 // replace utf32 encoding with utf32 with specific endianness | |
3622 if (encoding == encoding_utf32) return is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3623 | |
3624 // only do autodetection if no explicit encoding is requested | |
3625 if (encoding != encoding_auto) return encoding; | |
3626 | |
3627 // assume utf8 encoding | |
3628 return encoding_utf8; | |
3629 } | |
3630 | |
3631 template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T) | |
3632 { | |
3633 PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
3634 | |
3635 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
3636 | |
3637 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
3638 } | |
3639 | |
3640 template <typename D, typename T> PUGI_IMPL_FN size_t convert_buffer_output_generic(typename T::value_type dest, const char_t* data, size_t length, D, T, bool opt_swap) | |
3641 { | |
3642 PUGI_IMPL_STATIC_ASSERT(sizeof(char_t) == sizeof(typename D::type)); | |
3643 | |
3644 typename T::value_type end = D::process(reinterpret_cast<const typename D::type*>(data), length, dest, T()); | |
3645 | |
3646 if (opt_swap) | |
3647 { | |
3648 for (typename T::value_type i = dest; i != end; ++i) | |
3649 *i = endian_swap(*i); | |
3650 } | |
3651 | |
3652 return static_cast<size_t>(end - dest) * sizeof(*dest); | |
3653 } | |
3654 | |
3655 #ifdef PUGIXML_WCHAR_MODE | |
3656 PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length) | |
3657 { | |
3658 if (length < 1) return 0; | |
3659 | |
3660 // discard last character if it's the lead of a surrogate pair | |
3661 return (sizeof(wchar_t) == 2 && static_cast<unsigned int>(static_cast<uint16_t>(data[length - 1]) - 0xD800) < 0x400) ? length - 1 : length; | |
3662 } | |
3663 | |
3664 PUGI_IMPL_FN size_t convert_buffer_output(char_t* r_char, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
3665 { | |
3666 // only endian-swapping is required | |
3667 if (need_endian_swap_utf(encoding, get_wchar_encoding())) | |
3668 { | |
3669 convert_wchar_endian_swap(r_char, data, length); | |
3670 | |
3671 return length * sizeof(char_t); | |
3672 } | |
3673 | |
3674 // convert to utf8 | |
3675 if (encoding == encoding_utf8) | |
3676 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), utf8_writer()); | |
3677 | |
3678 // convert to utf16 | |
3679 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
3680 { | |
3681 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3682 | |
3683 return convert_buffer_output_generic(r_u16, data, length, wchar_decoder(), utf16_writer(), native_encoding != encoding); | |
3684 } | |
3685 | |
3686 // convert to utf32 | |
3687 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
3688 { | |
3689 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3690 | |
3691 return convert_buffer_output_generic(r_u32, data, length, wchar_decoder(), utf32_writer(), native_encoding != encoding); | |
3692 } | |
3693 | |
3694 // convert to latin1 | |
3695 if (encoding == encoding_latin1) | |
3696 return convert_buffer_output_generic(r_u8, data, length, wchar_decoder(), latin1_writer()); | |
3697 | |
3698 assert(false && "Invalid encoding"); // unreachable | |
3699 return 0; | |
3700 } | |
3701 #else | |
3702 PUGI_IMPL_FN size_t get_valid_length(const char_t* data, size_t length) | |
3703 { | |
3704 if (length < 5) return 0; | |
3705 | |
3706 for (size_t i = 1; i <= 4; ++i) | |
3707 { | |
3708 uint8_t ch = static_cast<uint8_t>(data[length - i]); | |
3709 | |
3710 // either a standalone character or a leading one | |
3711 if ((ch & 0xc0) != 0x80) return length - i; | |
3712 } | |
3713 | |
3714 // there are four non-leading characters at the end, sequence tail is broken so might as well process the whole chunk | |
3715 return length; | |
3716 } | |
3717 | |
3718 PUGI_IMPL_FN size_t convert_buffer_output(char_t* /* r_char */, uint8_t* r_u8, uint16_t* r_u16, uint32_t* r_u32, const char_t* data, size_t length, xml_encoding encoding) | |
3719 { | |
3720 if (encoding == encoding_utf16_be || encoding == encoding_utf16_le) | |
3721 { | |
3722 xml_encoding native_encoding = is_little_endian() ? encoding_utf16_le : encoding_utf16_be; | |
3723 | |
3724 return convert_buffer_output_generic(r_u16, data, length, utf8_decoder(), utf16_writer(), native_encoding != encoding); | |
3725 } | |
3726 | |
3727 if (encoding == encoding_utf32_be || encoding == encoding_utf32_le) | |
3728 { | |
3729 xml_encoding native_encoding = is_little_endian() ? encoding_utf32_le : encoding_utf32_be; | |
3730 | |
3731 return convert_buffer_output_generic(r_u32, data, length, utf8_decoder(), utf32_writer(), native_encoding != encoding); | |
3732 } | |
3733 | |
3734 if (encoding == encoding_latin1) | |
3735 return convert_buffer_output_generic(r_u8, data, length, utf8_decoder(), latin1_writer()); | |
3736 | |
3737 assert(false && "Invalid encoding"); // unreachable | |
3738 return 0; | |
3739 } | |
3740 #endif | |
3741 | |
3742 class xml_buffered_writer | |
3743 { | |
3744 xml_buffered_writer(const xml_buffered_writer&); | |
3745 xml_buffered_writer& operator=(const xml_buffered_writer&); | |
3746 | |
3747 public: | |
3748 xml_buffered_writer(xml_writer& writer_, xml_encoding user_encoding): writer(writer_), bufsize(0), encoding(get_write_encoding(user_encoding)) | |
3749 { | |
3750 PUGI_IMPL_STATIC_ASSERT(bufcapacity >= 8); | |
3751 } | |
3752 | |
3753 size_t flush() | |
3754 { | |
3755 flush(buffer, bufsize); | |
3756 bufsize = 0; | |
3757 return 0; | |
3758 } | |
3759 | |
3760 void flush(const char_t* data, size_t size) | |
3761 { | |
3762 if (size == 0) return; | |
3763 | |
3764 // fast path, just write data | |
3765 if (encoding == get_write_native_encoding()) | |
3766 writer.write(data, size * sizeof(char_t)); | |
3767 else | |
3768 { | |
3769 // convert chunk | |
3770 size_t result = convert_buffer_output(scratch.data_char, scratch.data_u8, scratch.data_u16, scratch.data_u32, data, size, encoding); | |
3771 assert(result <= sizeof(scratch)); | |
3772 | |
3773 // write data | |
3774 writer.write(scratch.data_u8, result); | |
3775 } | |
3776 } | |
3777 | |
3778 void write_direct(const char_t* data, size_t length) | |
3779 { | |
3780 // flush the remaining buffer contents | |
3781 flush(); | |
3782 | |
3783 // handle large chunks | |
3784 if (length > bufcapacity) | |
3785 { | |
3786 if (encoding == get_write_native_encoding()) | |
3787 { | |
3788 // fast path, can just write data chunk | |
3789 writer.write(data, length * sizeof(char_t)); | |
3790 return; | |
3791 } | |
3792 | |
3793 // need to convert in suitable chunks | |
3794 while (length > bufcapacity) | |
3795 { | |
3796 // get chunk size by selecting such number of characters that are guaranteed to fit into scratch buffer | |
3797 // and form a complete codepoint sequence (i.e. discard start of last codepoint if necessary) | |
3798 size_t chunk_size = get_valid_length(data, bufcapacity); | |
3799 assert(chunk_size); | |
3800 | |
3801 // convert chunk and write | |
3802 flush(data, chunk_size); | |
3803 | |
3804 // iterate | |
3805 data += chunk_size; | |
3806 length -= chunk_size; | |
3807 } | |
3808 | |
3809 // small tail is copied below | |
3810 bufsize = 0; | |
3811 } | |
3812 | |
3813 memcpy(buffer + bufsize, data, length * sizeof(char_t)); | |
3814 bufsize += length; | |
3815 } | |
3816 | |
3817 void write_buffer(const char_t* data, size_t length) | |
3818 { | |
3819 size_t offset = bufsize; | |
3820 | |
3821 if (offset + length <= bufcapacity) | |
3822 { | |
3823 memcpy(buffer + offset, data, length * sizeof(char_t)); | |
3824 bufsize = offset + length; | |
3825 } | |
3826 else | |
3827 { | |
3828 write_direct(data, length); | |
3829 } | |
3830 } | |
3831 | |
3832 void write_string(const char_t* data) | |
3833 { | |
3834 // write the part of the string that fits in the buffer | |
3835 size_t offset = bufsize; | |
3836 | |
3837 while (*data && offset < bufcapacity) | |
3838 buffer[offset++] = *data++; | |
3839 | |
3840 // write the rest | |
3841 if (offset < bufcapacity) | |
3842 { | |
3843 bufsize = offset; | |
3844 } | |
3845 else | |
3846 { | |
3847 // backtrack a bit if we have split the codepoint | |
3848 size_t length = offset - bufsize; | |
3849 size_t extra = length - get_valid_length(data - length, length); | |
3850 | |
3851 bufsize = offset - extra; | |
3852 | |
3853 write_direct(data - extra, strlength(data) + extra); | |
3854 } | |
3855 } | |
3856 | |
3857 void write(char_t d0) | |
3858 { | |
3859 size_t offset = bufsize; | |
3860 if (offset > bufcapacity - 1) offset = flush(); | |
3861 | |
3862 buffer[offset + 0] = d0; | |
3863 bufsize = offset + 1; | |
3864 } | |
3865 | |
3866 void write(char_t d0, char_t d1) | |
3867 { | |
3868 size_t offset = bufsize; | |
3869 if (offset > bufcapacity - 2) offset = flush(); | |
3870 | |
3871 buffer[offset + 0] = d0; | |
3872 buffer[offset + 1] = d1; | |
3873 bufsize = offset + 2; | |
3874 } | |
3875 | |
3876 void write(char_t d0, char_t d1, char_t d2) | |
3877 { | |
3878 size_t offset = bufsize; | |
3879 if (offset > bufcapacity - 3) offset = flush(); | |
3880 | |
3881 buffer[offset + 0] = d0; | |
3882 buffer[offset + 1] = d1; | |
3883 buffer[offset + 2] = d2; | |
3884 bufsize = offset + 3; | |
3885 } | |
3886 | |
3887 void write(char_t d0, char_t d1, char_t d2, char_t d3) | |
3888 { | |
3889 size_t offset = bufsize; | |
3890 if (offset > bufcapacity - 4) offset = flush(); | |
3891 | |
3892 buffer[offset + 0] = d0; | |
3893 buffer[offset + 1] = d1; | |
3894 buffer[offset + 2] = d2; | |
3895 buffer[offset + 3] = d3; | |
3896 bufsize = offset + 4; | |
3897 } | |
3898 | |
3899 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4) | |
3900 { | |
3901 size_t offset = bufsize; | |
3902 if (offset > bufcapacity - 5) offset = flush(); | |
3903 | |
3904 buffer[offset + 0] = d0; | |
3905 buffer[offset + 1] = d1; | |
3906 buffer[offset + 2] = d2; | |
3907 buffer[offset + 3] = d3; | |
3908 buffer[offset + 4] = d4; | |
3909 bufsize = offset + 5; | |
3910 } | |
3911 | |
3912 void write(char_t d0, char_t d1, char_t d2, char_t d3, char_t d4, char_t d5) | |
3913 { | |
3914 size_t offset = bufsize; | |
3915 if (offset > bufcapacity - 6) offset = flush(); | |
3916 | |
3917 buffer[offset + 0] = d0; | |
3918 buffer[offset + 1] = d1; | |
3919 buffer[offset + 2] = d2; | |
3920 buffer[offset + 3] = d3; | |
3921 buffer[offset + 4] = d4; | |
3922 buffer[offset + 5] = d5; | |
3923 bufsize = offset + 6; | |
3924 } | |
3925 | |
3926 // utf8 maximum expansion: x4 (-> utf32) | |
3927 // utf16 maximum expansion: x2 (-> utf32) | |
3928 // utf32 maximum expansion: x1 | |
3929 enum | |
3930 { | |
3931 bufcapacitybytes = | |
3932 #ifdef PUGIXML_MEMORY_OUTPUT_STACK | |
3933 PUGIXML_MEMORY_OUTPUT_STACK | |
3934 #else | |
3935 10240 | |
3936 #endif | |
3937 , | |
3938 bufcapacity = bufcapacitybytes / (sizeof(char_t) + 4) | |
3939 }; | |
3940 | |
3941 char_t buffer[bufcapacity]; | |
3942 | |
3943 union | |
3944 { | |
3945 uint8_t data_u8[4 * bufcapacity]; | |
3946 uint16_t data_u16[2 * bufcapacity]; | |
3947 uint32_t data_u32[bufcapacity]; | |
3948 char_t data_char[bufcapacity]; | |
3949 } scratch; | |
3950 | |
3951 xml_writer& writer; | |
3952 size_t bufsize; | |
3953 xml_encoding encoding; | |
3954 }; | |
3955 | |
3956 PUGI_IMPL_FN void text_output_escaped(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
3957 { | |
3958 while (*s) | |
3959 { | |
3960 const char_t* prev = s; | |
3961 | |
3962 // While *s is a usual symbol | |
3963 PUGI_IMPL_SCANWHILE_UNROLL(!PUGI_IMPL_IS_CHARTYPEX(ss, type)); | |
3964 | |
3965 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
3966 | |
3967 switch (*s) | |
3968 { | |
3969 case 0: break; | |
3970 case '&': | |
3971 writer.write('&', 'a', 'm', 'p', ';'); | |
3972 ++s; | |
3973 break; | |
3974 case '<': | |
3975 writer.write('&', 'l', 't', ';'); | |
3976 ++s; | |
3977 break; | |
3978 case '>': | |
3979 writer.write('&', 'g', 't', ';'); | |
3980 ++s; | |
3981 break; | |
3982 case '"': | |
3983 if (flags & format_attribute_single_quote) | |
3984 writer.write('"'); | |
3985 else | |
3986 writer.write('&', 'q', 'u', 'o', 't', ';'); | |
3987 ++s; | |
3988 break; | |
3989 case '\'': | |
3990 if (flags & format_attribute_single_quote) | |
3991 writer.write('&', 'a', 'p', 'o', 's', ';'); | |
3992 else | |
3993 writer.write('\''); | |
3994 ++s; | |
3995 break; | |
3996 default: // s is not a usual symbol | |
3997 { | |
3998 unsigned int ch = static_cast<unsigned int>(*s++); | |
3999 assert(ch < 32); | |
4000 | |
4001 if (!(flags & format_skip_control_chars)) | |
4002 writer.write('&', '#', static_cast<char_t>((ch / 10) + '0'), static_cast<char_t>((ch % 10) + '0'), ';'); | |
4003 } | |
4004 } | |
4005 } | |
4006 } | |
4007 | |
4008 PUGI_IMPL_FN void text_output(xml_buffered_writer& writer, const char_t* s, chartypex_t type, unsigned int flags) | |
4009 { | |
4010 if (flags & format_no_escapes) | |
4011 writer.write_string(s); | |
4012 else | |
4013 text_output_escaped(writer, s, type, flags); | |
4014 } | |
4015 | |
4016 PUGI_IMPL_FN void text_output_cdata(xml_buffered_writer& writer, const char_t* s) | |
4017 { | |
4018 do | |
4019 { | |
4020 writer.write('<', '!', '[', 'C', 'D'); | |
4021 writer.write('A', 'T', 'A', '['); | |
4022 | |
4023 const char_t* prev = s; | |
4024 | |
4025 // look for ]]> sequence - we can't output it as is since it terminates CDATA | |
4026 while (*s && !(s[0] == ']' && s[1] == ']' && s[2] == '>')) ++s; | |
4027 | |
4028 // skip ]] if we stopped at ]]>, > will go to the next CDATA section | |
4029 if (*s) s += 2; | |
4030 | |
4031 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
4032 | |
4033 writer.write(']', ']', '>'); | |
4034 } | |
4035 while (*s); | |
4036 } | |
4037 | |
4038 PUGI_IMPL_FN void text_output_indent(xml_buffered_writer& writer, const char_t* indent, size_t indent_length, unsigned int depth) | |
4039 { | |
4040 switch (indent_length) | |
4041 { | |
4042 case 1: | |
4043 { | |
4044 for (unsigned int i = 0; i < depth; ++i) | |
4045 writer.write(indent[0]); | |
4046 break; | |
4047 } | |
4048 | |
4049 case 2: | |
4050 { | |
4051 for (unsigned int i = 0; i < depth; ++i) | |
4052 writer.write(indent[0], indent[1]); | |
4053 break; | |
4054 } | |
4055 | |
4056 case 3: | |
4057 { | |
4058 for (unsigned int i = 0; i < depth; ++i) | |
4059 writer.write(indent[0], indent[1], indent[2]); | |
4060 break; | |
4061 } | |
4062 | |
4063 case 4: | |
4064 { | |
4065 for (unsigned int i = 0; i < depth; ++i) | |
4066 writer.write(indent[0], indent[1], indent[2], indent[3]); | |
4067 break; | |
4068 } | |
4069 | |
4070 default: | |
4071 { | |
4072 for (unsigned int i = 0; i < depth; ++i) | |
4073 writer.write_buffer(indent, indent_length); | |
4074 } | |
4075 } | |
4076 } | |
4077 | |
4078 PUGI_IMPL_FN void node_output_comment(xml_buffered_writer& writer, const char_t* s) | |
4079 { | |
4080 writer.write('<', '!', '-', '-'); | |
4081 | |
4082 while (*s) | |
4083 { | |
4084 const char_t* prev = s; | |
4085 | |
4086 // look for -\0 or -- sequence - we can't output it since -- is illegal in comment body | |
4087 while (*s && !(s[0] == '-' && (s[1] == '-' || s[1] == 0))) ++s; | |
4088 | |
4089 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
4090 | |
4091 if (*s) | |
4092 { | |
4093 assert(*s == '-'); | |
4094 | |
4095 writer.write('-', ' '); | |
4096 ++s; | |
4097 } | |
4098 } | |
4099 | |
4100 writer.write('-', '-', '>'); | |
4101 } | |
4102 | |
4103 PUGI_IMPL_FN void node_output_pi_value(xml_buffered_writer& writer, const char_t* s) | |
4104 { | |
4105 while (*s) | |
4106 { | |
4107 const char_t* prev = s; | |
4108 | |
4109 // look for ?> sequence - we can't output it since ?> terminates PI | |
4110 while (*s && !(s[0] == '?' && s[1] == '>')) ++s; | |
4111 | |
4112 writer.write_buffer(prev, static_cast<size_t>(s - prev)); | |
4113 | |
4114 if (*s) | |
4115 { | |
4116 assert(s[0] == '?' && s[1] == '>'); | |
4117 | |
4118 writer.write('?', ' ', '>'); | |
4119 s += 2; | |
4120 } | |
4121 } | |
4122 } | |
4123 | |
4124 PUGI_IMPL_FN void node_output_attributes(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
4125 { | |
4126 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4127 const char_t enquotation_char = (flags & format_attribute_single_quote) ? '\'' : '"'; | |
4128 | |
4129 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
4130 { | |
4131 if ((flags & (format_indent_attributes | format_raw)) == format_indent_attributes) | |
4132 { | |
4133 writer.write('\n'); | |
4134 | |
4135 text_output_indent(writer, indent, indent_length, depth + 1); | |
4136 } | |
4137 else | |
4138 { | |
4139 writer.write(' '); | |
4140 } | |
4141 | |
4142 writer.write_string(a->name ? a->name + 0 : default_name); | |
4143 writer.write('=', enquotation_char); | |
4144 | |
4145 if (a->value) | |
4146 text_output(writer, a->value, ctx_special_attr, flags); | |
4147 | |
4148 writer.write(enquotation_char); | |
4149 } | |
4150 } | |
4151 | |
4152 PUGI_IMPL_FN bool node_output_start(xml_buffered_writer& writer, xml_node_struct* node, const char_t* indent, size_t indent_length, unsigned int flags, unsigned int depth) | |
4153 { | |
4154 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4155 const char_t* name = node->name ? node->name + 0 : default_name; | |
4156 | |
4157 writer.write('<'); | |
4158 writer.write_string(name); | |
4159 | |
4160 if (node->first_attribute) | |
4161 node_output_attributes(writer, node, indent, indent_length, flags, depth); | |
4162 | |
4163 // element nodes can have value if parse_embed_pcdata was used | |
4164 if (!node->value) | |
4165 { | |
4166 if (!node->first_child) | |
4167 { | |
4168 if (flags & format_no_empty_element_tags) | |
4169 { | |
4170 writer.write('>', '<', '/'); | |
4171 writer.write_string(name); | |
4172 writer.write('>'); | |
4173 | |
4174 return false; | |
4175 } | |
4176 else | |
4177 { | |
4178 if ((flags & format_raw) == 0) | |
4179 writer.write(' '); | |
4180 | |
4181 writer.write('/', '>'); | |
4182 | |
4183 return false; | |
4184 } | |
4185 } | |
4186 else | |
4187 { | |
4188 writer.write('>'); | |
4189 | |
4190 return true; | |
4191 } | |
4192 } | |
4193 else | |
4194 { | |
4195 writer.write('>'); | |
4196 | |
4197 text_output(writer, node->value, ctx_special_pcdata, flags); | |
4198 | |
4199 if (!node->first_child) | |
4200 { | |
4201 writer.write('<', '/'); | |
4202 writer.write_string(name); | |
4203 writer.write('>'); | |
4204 | |
4205 return false; | |
4206 } | |
4207 else | |
4208 { | |
4209 return true; | |
4210 } | |
4211 } | |
4212 } | |
4213 | |
4214 PUGI_IMPL_FN void node_output_end(xml_buffered_writer& writer, xml_node_struct* node) | |
4215 { | |
4216 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4217 const char_t* name = node->name ? node->name + 0 : default_name; | |
4218 | |
4219 writer.write('<', '/'); | |
4220 writer.write_string(name); | |
4221 writer.write('>'); | |
4222 } | |
4223 | |
4224 PUGI_IMPL_FN void node_output_simple(xml_buffered_writer& writer, xml_node_struct* node, unsigned int flags) | |
4225 { | |
4226 const char_t* default_name = PUGIXML_TEXT(":anonymous"); | |
4227 | |
4228 switch (PUGI_IMPL_NODETYPE(node)) | |
4229 { | |
4230 case node_pcdata: | |
4231 text_output(writer, node->value ? node->value + 0 : PUGIXML_TEXT(""), ctx_special_pcdata, flags); | |
4232 break; | |
4233 | |
4234 case node_cdata: | |
4235 text_output_cdata(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
4236 break; | |
4237 | |
4238 case node_comment: | |
4239 node_output_comment(writer, node->value ? node->value + 0 : PUGIXML_TEXT("")); | |
4240 break; | |
4241 | |
4242 case node_pi: | |
4243 writer.write('<', '?'); | |
4244 writer.write_string(node->name ? node->name + 0 : default_name); | |
4245 | |
4246 if (node->value) | |
4247 { | |
4248 writer.write(' '); | |
4249 node_output_pi_value(writer, node->value); | |
4250 } | |
4251 | |
4252 writer.write('?', '>'); | |
4253 break; | |
4254 | |
4255 case node_declaration: | |
4256 writer.write('<', '?'); | |
4257 writer.write_string(node->name ? node->name + 0 : default_name); | |
4258 node_output_attributes(writer, node, PUGIXML_TEXT(""), 0, flags | format_raw, 0); | |
4259 writer.write('?', '>'); | |
4260 break; | |
4261 | |
4262 case node_doctype: | |
4263 writer.write('<', '!', 'D', 'O', 'C'); | |
4264 writer.write('T', 'Y', 'P', 'E'); | |
4265 | |
4266 if (node->value) | |
4267 { | |
4268 writer.write(' '); | |
4269 writer.write_string(node->value); | |
4270 } | |
4271 | |
4272 writer.write('>'); | |
4273 break; | |
4274 | |
4275 default: | |
4276 assert(false && "Invalid node type"); // unreachable | |
4277 } | |
4278 } | |
4279 | |
4280 enum indent_flags_t | |
4281 { | |
4282 indent_newline = 1, | |
4283 indent_indent = 2 | |
4284 }; | |
4285 | |
4286 PUGI_IMPL_FN void node_output(xml_buffered_writer& writer, xml_node_struct* root, const char_t* indent, unsigned int flags, unsigned int depth) | |
4287 { | |
4288 size_t indent_length = ((flags & (format_indent | format_indent_attributes)) && (flags & format_raw) == 0) ? strlength(indent) : 0; | |
4289 unsigned int indent_flags = indent_indent; | |
4290 | |
4291 xml_node_struct* node = root; | |
4292 | |
4293 do | |
4294 { | |
4295 assert(node); | |
4296 | |
4297 // begin writing current node | |
4298 if (PUGI_IMPL_NODETYPE(node) == node_pcdata || PUGI_IMPL_NODETYPE(node) == node_cdata) | |
4299 { | |
4300 node_output_simple(writer, node, flags); | |
4301 | |
4302 indent_flags = 0; | |
4303 } | |
4304 else | |
4305 { | |
4306 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4307 writer.write('\n'); | |
4308 | |
4309 if ((indent_flags & indent_indent) && indent_length) | |
4310 text_output_indent(writer, indent, indent_length, depth); | |
4311 | |
4312 if (PUGI_IMPL_NODETYPE(node) == node_element) | |
4313 { | |
4314 indent_flags = indent_newline | indent_indent; | |
4315 | |
4316 if (node_output_start(writer, node, indent, indent_length, flags, depth)) | |
4317 { | |
4318 // element nodes can have value if parse_embed_pcdata was used | |
4319 if (node->value) | |
4320 indent_flags = 0; | |
4321 | |
4322 node = node->first_child; | |
4323 depth++; | |
4324 continue; | |
4325 } | |
4326 } | |
4327 else if (PUGI_IMPL_NODETYPE(node) == node_document) | |
4328 { | |
4329 indent_flags = indent_indent; | |
4330 | |
4331 if (node->first_child) | |
4332 { | |
4333 node = node->first_child; | |
4334 continue; | |
4335 } | |
4336 } | |
4337 else | |
4338 { | |
4339 node_output_simple(writer, node, flags); | |
4340 | |
4341 indent_flags = indent_newline | indent_indent; | |
4342 } | |
4343 } | |
4344 | |
4345 // continue to the next node | |
4346 while (node != root) | |
4347 { | |
4348 if (node->next_sibling) | |
4349 { | |
4350 node = node->next_sibling; | |
4351 break; | |
4352 } | |
4353 | |
4354 node = node->parent; | |
4355 | |
4356 // write closing node | |
4357 if (PUGI_IMPL_NODETYPE(node) == node_element) | |
4358 { | |
4359 depth--; | |
4360 | |
4361 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4362 writer.write('\n'); | |
4363 | |
4364 if ((indent_flags & indent_indent) && indent_length) | |
4365 text_output_indent(writer, indent, indent_length, depth); | |
4366 | |
4367 node_output_end(writer, node); | |
4368 | |
4369 indent_flags = indent_newline | indent_indent; | |
4370 } | |
4371 } | |
4372 } | |
4373 while (node != root); | |
4374 | |
4375 if ((indent_flags & indent_newline) && (flags & format_raw) == 0) | |
4376 writer.write('\n'); | |
4377 } | |
4378 | |
4379 PUGI_IMPL_FN bool has_declaration(xml_node_struct* node) | |
4380 { | |
4381 for (xml_node_struct* child = node->first_child; child; child = child->next_sibling) | |
4382 { | |
4383 xml_node_type type = PUGI_IMPL_NODETYPE(child); | |
4384 | |
4385 if (type == node_declaration) return true; | |
4386 if (type == node_element) return false; | |
4387 } | |
4388 | |
4389 return false; | |
4390 } | |
4391 | |
4392 PUGI_IMPL_FN bool is_attribute_of(xml_attribute_struct* attr, xml_node_struct* node) | |
4393 { | |
4394 for (xml_attribute_struct* a = node->first_attribute; a; a = a->next_attribute) | |
4395 if (a == attr) | |
4396 return true; | |
4397 | |
4398 return false; | |
4399 } | |
4400 | |
4401 PUGI_IMPL_FN bool allow_insert_attribute(xml_node_type parent) | |
4402 { | |
4403 return parent == node_element || parent == node_declaration; | |
4404 } | |
4405 | |
4406 PUGI_IMPL_FN bool allow_insert_child(xml_node_type parent, xml_node_type child) | |
4407 { | |
4408 if (parent != node_document && parent != node_element) return false; | |
4409 if (child == node_document || child == node_null) return false; | |
4410 if (parent != node_document && (child == node_declaration || child == node_doctype)) return false; | |
4411 | |
4412 return true; | |
4413 } | |
4414 | |
4415 PUGI_IMPL_FN bool allow_move(xml_node parent, xml_node child) | |
4416 { | |
4417 // check that child can be a child of parent | |
4418 if (!allow_insert_child(parent.type(), child.type())) | |
4419 return false; | |
4420 | |
4421 // check that node is not moved between documents | |
4422 if (parent.root() != child.root()) | |
4423 return false; | |
4424 | |
4425 // check that new parent is not in the child subtree | |
4426 xml_node cur = parent; | |
4427 | |
4428 while (cur) | |
4429 { | |
4430 if (cur == child) | |
4431 return false; | |
4432 | |
4433 cur = cur.parent(); | |
4434 } | |
4435 | |
4436 return true; | |
4437 } | |
4438 | |
4439 template <typename String, typename Header> | |
4440 PUGI_IMPL_FN void node_copy_string(String& dest, Header& header, uintptr_t header_mask, char_t* source, Header& source_header, xml_allocator* alloc) | |
4441 { | |
4442 assert(!dest && (header & header_mask) == 0); // copies are performed into fresh nodes | |
4443 | |
4444 if (source) | |
4445 { | |
4446 if (alloc && (source_header & header_mask) == 0) | |
4447 { | |
4448 dest = source; | |
4449 | |
4450 // since strcpy_insitu can reuse document buffer memory we need to mark both source and dest as shared | |
4451 header |= xml_memory_page_contents_shared_mask; | |
4452 source_header |= xml_memory_page_contents_shared_mask; | |
4453 } | |
4454 else | |
4455 strcpy_insitu(dest, header, header_mask, source, strlength(source)); | |
4456 } | |
4457 } | |
4458 | |
4459 PUGI_IMPL_FN void node_copy_contents(xml_node_struct* dn, xml_node_struct* sn, xml_allocator* shared_alloc) | |
4460 { | |
4461 node_copy_string(dn->name, dn->header, xml_memory_page_name_allocated_mask, sn->name, sn->header, shared_alloc); | |
4462 node_copy_string(dn->value, dn->header, xml_memory_page_value_allocated_mask, sn->value, sn->header, shared_alloc); | |
4463 | |
4464 for (xml_attribute_struct* sa = sn->first_attribute; sa; sa = sa->next_attribute) | |
4465 { | |
4466 xml_attribute_struct* da = append_new_attribute(dn, get_allocator(dn)); | |
4467 | |
4468 if (da) | |
4469 { | |
4470 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
4471 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
4472 } | |
4473 } | |
4474 } | |
4475 | |
4476 PUGI_IMPL_FN void node_copy_tree(xml_node_struct* dn, xml_node_struct* sn) | |
4477 { | |
4478 xml_allocator& alloc = get_allocator(dn); | |
4479 xml_allocator* shared_alloc = (&alloc == &get_allocator(sn)) ? &alloc : 0; | |
4480 | |
4481 node_copy_contents(dn, sn, shared_alloc); | |
4482 | |
4483 xml_node_struct* dit = dn; | |
4484 xml_node_struct* sit = sn->first_child; | |
4485 | |
4486 while (sit && sit != sn) | |
4487 { | |
4488 // loop invariant: dit is inside the subtree rooted at dn | |
4489 assert(dit); | |
4490 | |
4491 // when a tree is copied into one of the descendants, we need to skip that subtree to avoid an infinite loop | |
4492 if (sit != dn) | |
4493 { | |
4494 xml_node_struct* copy = append_new_node(dit, alloc, PUGI_IMPL_NODETYPE(sit)); | |
4495 | |
4496 if (copy) | |
4497 { | |
4498 node_copy_contents(copy, sit, shared_alloc); | |
4499 | |
4500 if (sit->first_child) | |
4501 { | |
4502 dit = copy; | |
4503 sit = sit->first_child; | |
4504 continue; | |
4505 } | |
4506 } | |
4507 } | |
4508 | |
4509 // continue to the next node | |
4510 do | |
4511 { | |
4512 if (sit->next_sibling) | |
4513 { | |
4514 sit = sit->next_sibling; | |
4515 break; | |
4516 } | |
4517 | |
4518 sit = sit->parent; | |
4519 dit = dit->parent; | |
4520 | |
4521 // loop invariant: dit is inside the subtree rooted at dn while sit is inside sn | |
4522 assert(sit == sn || dit); | |
4523 } | |
4524 while (sit != sn); | |
4525 } | |
4526 | |
4527 assert(!sit || dit == dn->parent); | |
4528 } | |
4529 | |
4530 PUGI_IMPL_FN void node_copy_attribute(xml_attribute_struct* da, xml_attribute_struct* sa) | |
4531 { | |
4532 xml_allocator& alloc = get_allocator(da); | |
4533 xml_allocator* shared_alloc = (&alloc == &get_allocator(sa)) ? &alloc : 0; | |
4534 | |
4535 node_copy_string(da->name, da->header, xml_memory_page_name_allocated_mask, sa->name, sa->header, shared_alloc); | |
4536 node_copy_string(da->value, da->header, xml_memory_page_value_allocated_mask, sa->value, sa->header, shared_alloc); | |
4537 } | |
4538 | |
4539 inline bool is_text_node(xml_node_struct* node) | |
4540 { | |
4541 xml_node_type type = PUGI_IMPL_NODETYPE(node); | |
4542 | |
4543 return type == node_pcdata || type == node_cdata; | |
4544 } | |
4545 | |
4546 // get value with conversion functions | |
4547 template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW U string_to_integer(const char_t* value, U minv, U maxv) | |
4548 { | |
4549 U result = 0; | |
4550 const char_t* s = value; | |
4551 | |
4552 while (PUGI_IMPL_IS_CHARTYPE(*s, ct_space)) | |
4553 s++; | |
4554 | |
4555 bool negative = (*s == '-'); | |
4556 | |
4557 s += (*s == '+' || *s == '-'); | |
4558 | |
4559 bool overflow = false; | |
4560 | |
4561 if (s[0] == '0' && (s[1] | ' ') == 'x') | |
4562 { | |
4563 s += 2; | |
4564 | |
4565 // since overflow detection relies on length of the sequence skip leading zeros | |
4566 while (*s == '0') | |
4567 s++; | |
4568 | |
4569 const char_t* start = s; | |
4570 | |
4571 for (;;) | |
4572 { | |
4573 if (static_cast<unsigned>(*s - '0') < 10) | |
4574 result = result * 16 + (*s - '0'); | |
4575 else if (static_cast<unsigned>((*s | ' ') - 'a') < 6) | |
4576 result = result * 16 + ((*s | ' ') - 'a' + 10); | |
4577 else | |
4578 break; | |
4579 | |
4580 s++; | |
4581 } | |
4582 | |
4583 size_t digits = static_cast<size_t>(s - start); | |
4584 | |
4585 overflow = digits > sizeof(U) * 2; | |
4586 } | |
4587 else | |
4588 { | |
4589 // since overflow detection relies on length of the sequence skip leading zeros | |
4590 while (*s == '0') | |
4591 s++; | |
4592 | |
4593 const char_t* start = s; | |
4594 | |
4595 for (;;) | |
4596 { | |
4597 if (static_cast<unsigned>(*s - '0') < 10) | |
4598 result = result * 10 + (*s - '0'); | |
4599 else | |
4600 break; | |
4601 | |
4602 s++; | |
4603 } | |
4604 | |
4605 size_t digits = static_cast<size_t>(s - start); | |
4606 | |
4607 PUGI_IMPL_STATIC_ASSERT(sizeof(U) == 8 || sizeof(U) == 4 || sizeof(U) == 2); | |
4608 | |
4609 const size_t max_digits10 = sizeof(U) == 8 ? 20 : sizeof(U) == 4 ? 10 : 5; | |
4610 const char_t max_lead = sizeof(U) == 8 ? '1' : sizeof(U) == 4 ? '4' : '6'; | |
4611 const size_t high_bit = sizeof(U) * 8 - 1; | |
4612 | |
4613 overflow = digits >= max_digits10 && !(digits == max_digits10 && (*start < max_lead || (*start == max_lead && result >> high_bit))); | |
4614 } | |
4615 | |
4616 if (negative) | |
4617 { | |
4618 // Workaround for crayc++ CC-3059: Expected no overflow in routine. | |
4619 #ifdef _CRAYC | |
4620 return (overflow || result > ~minv + 1) ? minv : ~result + 1; | |
4621 #else | |
4622 return (overflow || result > 0 - minv) ? minv : 0 - result; | |
4623 #endif | |
4624 } | |
4625 else | |
4626 return (overflow || result > maxv) ? maxv : result; | |
4627 } | |
4628 | |
4629 PUGI_IMPL_FN int get_value_int(const char_t* value) | |
4630 { | |
4631 return string_to_integer<unsigned int>(value, static_cast<unsigned int>(INT_MIN), INT_MAX); | |
4632 } | |
4633 | |
4634 PUGI_IMPL_FN unsigned int get_value_uint(const char_t* value) | |
4635 { | |
4636 return string_to_integer<unsigned int>(value, 0, UINT_MAX); | |
4637 } | |
4638 | |
4639 PUGI_IMPL_FN double get_value_double(const char_t* value) | |
4640 { | |
4641 #ifdef PUGIXML_WCHAR_MODE | |
4642 return wcstod(value, 0); | |
4643 #else | |
4644 return strtod(value, 0); | |
4645 #endif | |
4646 } | |
4647 | |
4648 PUGI_IMPL_FN float get_value_float(const char_t* value) | |
4649 { | |
4650 #ifdef PUGIXML_WCHAR_MODE | |
4651 return static_cast<float>(wcstod(value, 0)); | |
4652 #else | |
4653 return static_cast<float>(strtod(value, 0)); | |
4654 #endif | |
4655 } | |
4656 | |
4657 PUGI_IMPL_FN bool get_value_bool(const char_t* value) | |
4658 { | |
4659 // only look at first char | |
4660 char_t first = *value; | |
4661 | |
4662 // 1*, t* (true), T* (True), y* (yes), Y* (YES) | |
4663 return (first == '1' || first == 't' || first == 'T' || first == 'y' || first == 'Y'); | |
4664 } | |
4665 | |
4666 #ifdef PUGIXML_HAS_LONG_LONG | |
4667 PUGI_IMPL_FN long long get_value_llong(const char_t* value) | |
4668 { | |
4669 return string_to_integer<unsigned long long>(value, static_cast<unsigned long long>(LLONG_MIN), LLONG_MAX); | |
4670 } | |
4671 | |
4672 PUGI_IMPL_FN unsigned long long get_value_ullong(const char_t* value) | |
4673 { | |
4674 return string_to_integer<unsigned long long>(value, 0, ULLONG_MAX); | |
4675 } | |
4676 #endif | |
4677 | |
4678 template <typename U> PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW char_t* integer_to_string(char_t* begin, char_t* end, U value, bool negative) | |
4679 { | |
4680 char_t* result = end - 1; | |
4681 U rest = negative ? 0 - value : value; | |
4682 | |
4683 do | |
4684 { | |
4685 *result-- = static_cast<char_t>('0' + (rest % 10)); | |
4686 rest /= 10; | |
4687 } | |
4688 while (rest); | |
4689 | |
4690 assert(result >= begin); | |
4691 (void)begin; | |
4692 | |
4693 *result = '-'; | |
4694 | |
4695 return result + !negative; | |
4696 } | |
4697 | |
4698 // set value with conversion functions | |
4699 template <typename String, typename Header> | |
4700 PUGI_IMPL_FN bool set_value_ascii(String& dest, Header& header, uintptr_t header_mask, char* buf) | |
4701 { | |
4702 #ifdef PUGIXML_WCHAR_MODE | |
4703 char_t wbuf[128]; | |
4704 assert(strlen(buf) < sizeof(wbuf) / sizeof(wbuf[0])); | |
4705 | |
4706 size_t offset = 0; | |
4707 for (; buf[offset]; ++offset) wbuf[offset] = buf[offset]; | |
4708 | |
4709 return strcpy_insitu(dest, header, header_mask, wbuf, offset); | |
4710 #else | |
4711 return strcpy_insitu(dest, header, header_mask, buf, strlen(buf)); | |
4712 #endif | |
4713 } | |
4714 | |
4715 template <typename U, typename String, typename Header> | |
4716 PUGI_IMPL_FN bool set_value_integer(String& dest, Header& header, uintptr_t header_mask, U value, bool negative) | |
4717 { | |
4718 char_t buf[64]; | |
4719 char_t* end = buf + sizeof(buf) / sizeof(buf[0]); | |
4720 char_t* begin = integer_to_string(buf, end, value, negative); | |
4721 | |
4722 return strcpy_insitu(dest, header, header_mask, begin, end - begin); | |
4723 } | |
4724 | |
4725 template <typename String, typename Header> | |
4726 PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, float value, int precision) | |
4727 { | |
4728 char buf[128]; | |
4729 PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, double(value)); | |
4730 | |
4731 return set_value_ascii(dest, header, header_mask, buf); | |
4732 } | |
4733 | |
4734 template <typename String, typename Header> | |
4735 PUGI_IMPL_FN bool set_value_convert(String& dest, Header& header, uintptr_t header_mask, double value, int precision) | |
4736 { | |
4737 char buf[128]; | |
4738 PUGI_IMPL_SNPRINTF(buf, "%.*g", precision, value); | |
4739 | |
4740 return set_value_ascii(dest, header, header_mask, buf); | |
4741 } | |
4742 | |
4743 template <typename String, typename Header> | |
4744 PUGI_IMPL_FN bool set_value_bool(String& dest, Header& header, uintptr_t header_mask, bool value) | |
4745 { | |
4746 return strcpy_insitu(dest, header, header_mask, value ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false"), value ? 4 : 5); | |
4747 } | |
4748 | |
4749 PUGI_IMPL_FN xml_parse_result load_buffer_impl(xml_document_struct* doc, xml_node_struct* root, void* contents, size_t size, unsigned int options, xml_encoding encoding, bool is_mutable, bool own, char_t** out_buffer) | |
4750 { | |
4751 // check input buffer | |
4752 if (!contents && size) return make_parse_result(status_io_error); | |
4753 | |
4754 // get actual encoding | |
4755 xml_encoding buffer_encoding = impl::get_buffer_encoding(encoding, contents, size); | |
4756 | |
4757 // if convert_buffer below throws bad_alloc, we still need to deallocate contents if we own it | |
4758 auto_deleter<void> contents_guard(own ? contents : 0, xml_memory::deallocate); | |
4759 | |
4760 // get private buffer | |
4761 char_t* buffer = 0; | |
4762 size_t length = 0; | |
4763 | |
4764 // coverity[var_deref_model] | |
4765 if (!impl::convert_buffer(buffer, length, buffer_encoding, contents, size, is_mutable)) return impl::make_parse_result(status_out_of_memory); | |
4766 | |
4767 // after this we either deallocate contents (below) or hold on to it via doc->buffer, so we don't need to guard it | |
4768 contents_guard.release(); | |
4769 | |
4770 // delete original buffer if we performed a conversion | |
4771 if (own && buffer != contents && contents) impl::xml_memory::deallocate(contents); | |
4772 | |
4773 // grab onto buffer if it's our buffer, user is responsible for deallocating contents himself | |
4774 if (own || buffer != contents) *out_buffer = buffer; | |
4775 | |
4776 // store buffer for offset_debug | |
4777 doc->buffer = buffer; | |
4778 | |
4779 // parse | |
4780 xml_parse_result res = impl::xml_parser::parse(buffer, length, doc, root, options); | |
4781 | |
4782 // remember encoding | |
4783 res.encoding = buffer_encoding; | |
4784 | |
4785 return res; | |
4786 } | |
4787 | |
4788 // we need to get length of entire file to load it in memory; the only (relatively) sane way to do it is via seek/tell trick | |
4789 PUGI_IMPL_FN xml_parse_status get_file_size(FILE* file, size_t& out_result) | |
4790 { | |
4791 #if defined(__linux__) || defined(__APPLE__) | |
4792 // this simultaneously retrieves the file size and file mode (to guard against loading non-files) | |
4793 struct stat st; | |
4794 if (fstat(fileno(file), &st) != 0) return status_io_error; | |
4795 | |
4796 // anything that's not a regular file doesn't have a coherent length | |
4797 if (!S_ISREG(st.st_mode)) return status_io_error; | |
4798 | |
4799 typedef off_t length_type; | |
4800 length_type length = st.st_size; | |
4801 #elif defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 | |
4802 // there are 64-bit versions of fseek/ftell, let's use them | |
4803 typedef __int64 length_type; | |
4804 | |
4805 _fseeki64(file, 0, SEEK_END); | |
4806 length_type length = _ftelli64(file); | |
4807 _fseeki64(file, 0, SEEK_SET); | |
4808 #elif defined(__MINGW32__) && !defined(__NO_MINGW_LFS) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR)) | |
4809 // there are 64-bit versions of fseek/ftell, let's use them | |
4810 typedef off64_t length_type; | |
4811 | |
4812 fseeko64(file, 0, SEEK_END); | |
4813 length_type length = ftello64(file); | |
4814 fseeko64(file, 0, SEEK_SET); | |
4815 #else | |
4816 // if this is a 32-bit OS, long is enough; if this is a unix system, long is 64-bit, which is enough; otherwise we can't do anything anyway. | |
4817 typedef long length_type; | |
4818 | |
4819 fseek(file, 0, SEEK_END); | |
4820 length_type length = ftell(file); | |
4821 fseek(file, 0, SEEK_SET); | |
4822 #endif | |
4823 | |
4824 // check for I/O errors | |
4825 if (length < 0) return status_io_error; | |
4826 | |
4827 // check for overflow | |
4828 size_t result = static_cast<size_t>(length); | |
4829 | |
4830 if (static_cast<length_type>(result) != length) return status_out_of_memory; | |
4831 | |
4832 // finalize | |
4833 out_result = result; | |
4834 | |
4835 return status_ok; | |
4836 } | |
4837 | |
4838 // This function assumes that buffer has extra sizeof(char_t) writable bytes after size | |
4839 PUGI_IMPL_FN size_t zero_terminate_buffer(void* buffer, size_t size, xml_encoding encoding) | |
4840 { | |
4841 // We only need to zero-terminate if encoding conversion does not do it for us | |
4842 #ifdef PUGIXML_WCHAR_MODE | |
4843 xml_encoding wchar_encoding = get_wchar_encoding(); | |
4844 | |
4845 if (encoding == wchar_encoding || need_endian_swap_utf(encoding, wchar_encoding)) | |
4846 { | |
4847 size_t length = size / sizeof(char_t); | |
4848 | |
4849 static_cast<char_t*>(buffer)[length] = 0; | |
4850 return (length + 1) * sizeof(char_t); | |
4851 } | |
4852 #else | |
4853 if (encoding == encoding_utf8) | |
4854 { | |
4855 static_cast<char*>(buffer)[size] = 0; | |
4856 return size + 1; | |
4857 } | |
4858 #endif | |
4859 | |
4860 return size; | |
4861 } | |
4862 | |
4863 PUGI_IMPL_FN xml_parse_result load_file_impl(xml_document_struct* doc, FILE* file, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
4864 { | |
4865 if (!file) return make_parse_result(status_file_not_found); | |
4866 | |
4867 // get file size (can result in I/O errors) | |
4868 size_t size = 0; | |
4869 xml_parse_status size_status = get_file_size(file, size); | |
4870 if (size_status != status_ok) return make_parse_result(size_status); | |
4871 | |
4872 size_t max_suffix_size = sizeof(char_t); | |
4873 | |
4874 // allocate buffer for the whole file | |
4875 char* contents = static_cast<char*>(xml_memory::allocate(size + max_suffix_size)); | |
4876 if (!contents) return make_parse_result(status_out_of_memory); | |
4877 | |
4878 // read file in memory | |
4879 size_t read_size = fread(contents, 1, size, file); | |
4880 | |
4881 if (read_size != size) | |
4882 { | |
4883 xml_memory::deallocate(contents); | |
4884 return make_parse_result(status_io_error); | |
4885 } | |
4886 | |
4887 xml_encoding real_encoding = get_buffer_encoding(encoding, contents, size); | |
4888 | |
4889 return load_buffer_impl(doc, doc, contents, zero_terminate_buffer(contents, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
4890 } | |
4891 | |
4892 PUGI_IMPL_FN void close_file(FILE* file) | |
4893 { | |
4894 fclose(file); | |
4895 } | |
4896 | |
4897 #ifndef PUGIXML_NO_STL | |
4898 template <typename T> struct xml_stream_chunk | |
4899 { | |
4900 static xml_stream_chunk* create() | |
4901 { | |
4902 void* memory = xml_memory::allocate(sizeof(xml_stream_chunk)); | |
4903 if (!memory) return 0; | |
4904 | |
4905 return new (memory) xml_stream_chunk(); | |
4906 } | |
4907 | |
4908 static void destroy(xml_stream_chunk* chunk) | |
4909 { | |
4910 // free chunk chain | |
4911 while (chunk) | |
4912 { | |
4913 xml_stream_chunk* next_ = chunk->next; | |
4914 | |
4915 xml_memory::deallocate(chunk); | |
4916 | |
4917 chunk = next_; | |
4918 } | |
4919 } | |
4920 | |
4921 xml_stream_chunk(): next(0), size(0) | |
4922 { | |
4923 } | |
4924 | |
4925 xml_stream_chunk* next; | |
4926 size_t size; | |
4927 | |
4928 T data[xml_memory_page_size / sizeof(T)]; | |
4929 }; | |
4930 | |
4931 template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_noseek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
4932 { | |
4933 auto_deleter<xml_stream_chunk<T> > chunks(0, xml_stream_chunk<T>::destroy); | |
4934 | |
4935 // read file to a chunk list | |
4936 size_t total = 0; | |
4937 xml_stream_chunk<T>* last = 0; | |
4938 | |
4939 while (!stream.eof()) | |
4940 { | |
4941 // allocate new chunk | |
4942 xml_stream_chunk<T>* chunk = xml_stream_chunk<T>::create(); | |
4943 if (!chunk) return status_out_of_memory; | |
4944 | |
4945 // append chunk to list | |
4946 if (last) last = last->next = chunk; | |
4947 else chunks.data = last = chunk; | |
4948 | |
4949 // read data to chunk | |
4950 stream.read(chunk->data, static_cast<std::streamsize>(sizeof(chunk->data) / sizeof(T))); | |
4951 chunk->size = static_cast<size_t>(stream.gcount()) * sizeof(T); | |
4952 | |
4953 // read may set failbit | eofbit in case gcount() is less than read length, so check for other I/O errors | |
4954 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
4955 | |
4956 // guard against huge files (chunk size is small enough to make this overflow check work) | |
4957 if (total + chunk->size < total) return status_out_of_memory; | |
4958 total += chunk->size; | |
4959 } | |
4960 | |
4961 size_t max_suffix_size = sizeof(char_t); | |
4962 | |
4963 // copy chunk list to a contiguous buffer | |
4964 char* buffer = static_cast<char*>(xml_memory::allocate(total + max_suffix_size)); | |
4965 if (!buffer) return status_out_of_memory; | |
4966 | |
4967 char* write = buffer; | |
4968 | |
4969 for (xml_stream_chunk<T>* chunk = chunks.data; chunk; chunk = chunk->next) | |
4970 { | |
4971 assert(write + chunk->size <= buffer + total); | |
4972 memcpy(write, chunk->data, chunk->size); | |
4973 write += chunk->size; | |
4974 } | |
4975 | |
4976 assert(write == buffer + total); | |
4977 | |
4978 // return buffer | |
4979 *out_buffer = buffer; | |
4980 *out_size = total; | |
4981 | |
4982 return status_ok; | |
4983 } | |
4984 | |
4985 template <typename T> PUGI_IMPL_FN xml_parse_status load_stream_data_seek(std::basic_istream<T>& stream, void** out_buffer, size_t* out_size) | |
4986 { | |
4987 // get length of remaining data in stream | |
4988 typename std::basic_istream<T>::pos_type pos = stream.tellg(); | |
4989 stream.seekg(0, std::ios::end); | |
4990 std::streamoff length = stream.tellg() - pos; | |
4991 stream.seekg(pos); | |
4992 | |
4993 if (stream.fail() || pos < 0) return status_io_error; | |
4994 | |
4995 // guard against huge files | |
4996 size_t read_length = static_cast<size_t>(length); | |
4997 | |
4998 if (static_cast<std::streamsize>(read_length) != length || length < 0) return status_out_of_memory; | |
4999 | |
5000 size_t max_suffix_size = sizeof(char_t); | |
5001 | |
5002 // read stream data into memory (guard against stream exceptions with buffer holder) | |
5003 auto_deleter<void> buffer(xml_memory::allocate(read_length * sizeof(T) + max_suffix_size), xml_memory::deallocate); | |
5004 if (!buffer.data) return status_out_of_memory; | |
5005 | |
5006 stream.read(static_cast<T*>(buffer.data), static_cast<std::streamsize>(read_length)); | |
5007 | |
5008 // read may set failbit | eofbit in case gcount() is less than read_length (i.e. line ending conversion), so check for other I/O errors | |
5009 if (stream.bad() || (!stream.eof() && stream.fail())) return status_io_error; | |
5010 | |
5011 // return buffer | |
5012 size_t actual_length = static_cast<size_t>(stream.gcount()); | |
5013 assert(actual_length <= read_length); | |
5014 | |
5015 *out_buffer = buffer.release(); | |
5016 *out_size = actual_length * sizeof(T); | |
5017 | |
5018 return status_ok; | |
5019 } | |
5020 | |
5021 template <typename T> PUGI_IMPL_FN xml_parse_result load_stream_impl(xml_document_struct* doc, std::basic_istream<T>& stream, unsigned int options, xml_encoding encoding, char_t** out_buffer) | |
5022 { | |
5023 void* buffer = 0; | |
5024 size_t size = 0; | |
5025 xml_parse_status status = status_ok; | |
5026 | |
5027 // if stream has an error bit set, bail out (otherwise tellg() can fail and we'll clear error bits) | |
5028 if (stream.fail()) return make_parse_result(status_io_error); | |
5029 | |
5030 // load stream to memory (using seek-based implementation if possible, since it's faster and takes less memory) | |
5031 if (stream.tellg() < 0) | |
5032 { | |
5033 stream.clear(); // clear error flags that could be set by a failing tellg | |
5034 status = load_stream_data_noseek(stream, &buffer, &size); | |
5035 } | |
5036 else | |
5037 status = load_stream_data_seek(stream, &buffer, &size); | |
5038 | |
5039 if (status != status_ok) return make_parse_result(status); | |
5040 | |
5041 xml_encoding real_encoding = get_buffer_encoding(encoding, buffer, size); | |
5042 | |
5043 return load_buffer_impl(doc, doc, buffer, zero_terminate_buffer(buffer, size, real_encoding), options, real_encoding, true, true, out_buffer); | |
5044 } | |
5045 #endif | |
5046 | |
5047 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__) || (defined(__MINGW32__) && (!defined(__STRICT_ANSI__) || defined(__MINGW64_VERSION_MAJOR))) | |
5048 PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
5049 { | |
5050 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 | |
5051 FILE* file = 0; | |
5052 return _wfopen_s(&file, path, mode) == 0 ? file : 0; | |
5053 #else | |
5054 return _wfopen(path, mode); | |
5055 #endif | |
5056 } | |
5057 #else | |
5058 PUGI_IMPL_FN char* convert_path_heap(const wchar_t* str) | |
5059 { | |
5060 assert(str); | |
5061 | |
5062 // first pass: get length in utf8 characters | |
5063 size_t length = strlength_wide(str); | |
5064 size_t size = as_utf8_begin(str, length); | |
5065 | |
5066 // allocate resulting string | |
5067 char* result = static_cast<char*>(xml_memory::allocate(size + 1)); | |
5068 if (!result) return 0; | |
5069 | |
5070 // second pass: convert to utf8 | |
5071 as_utf8_end(result, size, str, length); | |
5072 | |
5073 // zero-terminate | |
5074 result[size] = 0; | |
5075 | |
5076 return result; | |
5077 } | |
5078 | |
5079 PUGI_IMPL_FN FILE* open_file_wide(const wchar_t* path, const wchar_t* mode) | |
5080 { | |
5081 // there is no standard function to open wide paths, so our best bet is to try utf8 path | |
5082 char* path_utf8 = convert_path_heap(path); | |
5083 if (!path_utf8) return 0; | |
5084 | |
5085 // convert mode to ASCII (we mirror _wfopen interface) | |
5086 char mode_ascii[4] = {0}; | |
5087 for (size_t i = 0; mode[i]; ++i) mode_ascii[i] = static_cast<char>(mode[i]); | |
5088 | |
5089 // try to open the utf8 path | |
5090 FILE* result = fopen(path_utf8, mode_ascii); | |
5091 | |
5092 // free dummy buffer | |
5093 xml_memory::deallocate(path_utf8); | |
5094 | |
5095 return result; | |
5096 } | |
5097 #endif | |
5098 | |
5099 PUGI_IMPL_FN FILE* open_file(const char* path, const char* mode) | |
5100 { | |
5101 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 | |
5102 FILE* file = 0; | |
5103 return fopen_s(&file, path, mode) == 0 ? file : 0; | |
5104 #else | |
5105 return fopen(path, mode); | |
5106 #endif | |
5107 } | |
5108 | |
5109 PUGI_IMPL_FN bool save_file_impl(const xml_document& doc, FILE* file, const char_t* indent, unsigned int flags, xml_encoding encoding) | |
5110 { | |
5111 if (!file) return false; | |
5112 | |
5113 xml_writer_file writer(file); | |
5114 doc.save(writer, indent, flags, encoding); | |
5115 | |
5116 return fflush(file) == 0 && ferror(file) == 0; | |
5117 } | |
5118 | |
5119 struct name_null_sentry | |
5120 { | |
5121 xml_node_struct* node; | |
5122 char_t* name; | |
5123 | |
5124 name_null_sentry(xml_node_struct* node_): node(node_), name(node_->name) | |
5125 { | |
5126 node->name = 0; | |
5127 } | |
5128 | |
5129 ~name_null_sentry() | |
5130 { | |
5131 node->name = name; | |
5132 } | |
5133 }; | |
5134 PUGI_IMPL_NS_END | |
5135 | |
5136 namespace pugi | |
5137 { | |
5138 PUGI_IMPL_FN xml_writer::~xml_writer() | |
5139 { | |
5140 } | |
5141 | |
5142 PUGI_IMPL_FN xml_writer_file::xml_writer_file(void* file_): file(file_) | |
5143 { | |
5144 } | |
5145 | |
5146 PUGI_IMPL_FN void xml_writer_file::write(const void* data, size_t size) | |
5147 { | |
5148 size_t result = fwrite(data, 1, size, static_cast<FILE*>(file)); | |
5149 (void)!result; // unfortunately we can't do proper error handling here | |
5150 } | |
5151 | |
5152 #ifndef PUGIXML_NO_STL | |
5153 PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<char, std::char_traits<char> >& stream): narrow_stream(&stream), wide_stream(0) | |
5154 { | |
5155 } | |
5156 | |
5157 PUGI_IMPL_FN xml_writer_stream::xml_writer_stream(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream): narrow_stream(0), wide_stream(&stream) | |
5158 { | |
5159 } | |
5160 | |
5161 PUGI_IMPL_FN void xml_writer_stream::write(const void* data, size_t size) | |
5162 { | |
5163 if (narrow_stream) | |
5164 { | |
5165 assert(!wide_stream); | |
5166 narrow_stream->write(reinterpret_cast<const char*>(data), static_cast<std::streamsize>(size)); | |
5167 } | |
5168 else | |
5169 { | |
5170 assert(wide_stream); | |
5171 assert(size % sizeof(wchar_t) == 0); | |
5172 | |
5173 wide_stream->write(reinterpret_cast<const wchar_t*>(data), static_cast<std::streamsize>(size / sizeof(wchar_t))); | |
5174 } | |
5175 } | |
5176 #endif | |
5177 | |
5178 PUGI_IMPL_FN xml_tree_walker::xml_tree_walker(): _depth(0) | |
5179 { | |
5180 } | |
5181 | |
5182 PUGI_IMPL_FN xml_tree_walker::~xml_tree_walker() | |
5183 { | |
5184 } | |
5185 | |
5186 PUGI_IMPL_FN int xml_tree_walker::depth() const | |
5187 { | |
5188 return _depth; | |
5189 } | |
5190 | |
5191 PUGI_IMPL_FN bool xml_tree_walker::begin(xml_node&) | |
5192 { | |
5193 return true; | |
5194 } | |
5195 | |
5196 PUGI_IMPL_FN bool xml_tree_walker::end(xml_node&) | |
5197 { | |
5198 return true; | |
5199 } | |
5200 | |
5201 PUGI_IMPL_FN xml_attribute::xml_attribute(): _attr(0) | |
5202 { | |
5203 } | |
5204 | |
5205 PUGI_IMPL_FN xml_attribute::xml_attribute(xml_attribute_struct* attr): _attr(attr) | |
5206 { | |
5207 } | |
5208 | |
5209 PUGI_IMPL_FN static void unspecified_bool_xml_attribute(xml_attribute***) | |
5210 { | |
5211 } | |
5212 | |
5213 PUGI_IMPL_FN xml_attribute::operator xml_attribute::unspecified_bool_type() const | |
5214 { | |
5215 return _attr ? unspecified_bool_xml_attribute : 0; | |
5216 } | |
5217 | |
5218 PUGI_IMPL_FN bool xml_attribute::operator!() const | |
5219 { | |
5220 return !_attr; | |
5221 } | |
5222 | |
5223 PUGI_IMPL_FN bool xml_attribute::operator==(const xml_attribute& r) const | |
5224 { | |
5225 return (_attr == r._attr); | |
5226 } | |
5227 | |
5228 PUGI_IMPL_FN bool xml_attribute::operator!=(const xml_attribute& r) const | |
5229 { | |
5230 return (_attr != r._attr); | |
5231 } | |
5232 | |
5233 PUGI_IMPL_FN bool xml_attribute::operator<(const xml_attribute& r) const | |
5234 { | |
5235 return (_attr < r._attr); | |
5236 } | |
5237 | |
5238 PUGI_IMPL_FN bool xml_attribute::operator>(const xml_attribute& r) const | |
5239 { | |
5240 return (_attr > r._attr); | |
5241 } | |
5242 | |
5243 PUGI_IMPL_FN bool xml_attribute::operator<=(const xml_attribute& r) const | |
5244 { | |
5245 return (_attr <= r._attr); | |
5246 } | |
5247 | |
5248 PUGI_IMPL_FN bool xml_attribute::operator>=(const xml_attribute& r) const | |
5249 { | |
5250 return (_attr >= r._attr); | |
5251 } | |
5252 | |
5253 PUGI_IMPL_FN xml_attribute xml_attribute::next_attribute() const | |
5254 { | |
5255 if (!_attr) return xml_attribute(); | |
5256 return xml_attribute(_attr->next_attribute); | |
5257 } | |
5258 | |
5259 PUGI_IMPL_FN xml_attribute xml_attribute::previous_attribute() const | |
5260 { | |
5261 if (!_attr) return xml_attribute(); | |
5262 xml_attribute_struct* prev = _attr->prev_attribute_c; | |
5263 return prev->next_attribute ? xml_attribute(prev) : xml_attribute(); | |
5264 } | |
5265 | |
5266 PUGI_IMPL_FN const char_t* xml_attribute::as_string(const char_t* def) const | |
5267 { | |
5268 if (!_attr) return def; | |
5269 const char_t* value = _attr->value; | |
5270 return value ? value : def; | |
5271 } | |
5272 | |
5273 PUGI_IMPL_FN int xml_attribute::as_int(int def) const | |
5274 { | |
5275 if (!_attr) return def; | |
5276 const char_t* value = _attr->value; | |
5277 return value ? impl::get_value_int(value) : def; | |
5278 } | |
5279 | |
5280 PUGI_IMPL_FN unsigned int xml_attribute::as_uint(unsigned int def) const | |
5281 { | |
5282 if (!_attr) return def; | |
5283 const char_t* value = _attr->value; | |
5284 return value ? impl::get_value_uint(value) : def; | |
5285 } | |
5286 | |
5287 PUGI_IMPL_FN double xml_attribute::as_double(double def) const | |
5288 { | |
5289 if (!_attr) return def; | |
5290 const char_t* value = _attr->value; | |
5291 return value ? impl::get_value_double(value) : def; | |
5292 } | |
5293 | |
5294 PUGI_IMPL_FN float xml_attribute::as_float(float def) const | |
5295 { | |
5296 if (!_attr) return def; | |
5297 const char_t* value = _attr->value; | |
5298 return value ? impl::get_value_float(value) : def; | |
5299 } | |
5300 | |
5301 PUGI_IMPL_FN bool xml_attribute::as_bool(bool def) const | |
5302 { | |
5303 if (!_attr) return def; | |
5304 const char_t* value = _attr->value; | |
5305 return value ? impl::get_value_bool(value) : def; | |
5306 } | |
5307 | |
5308 #ifdef PUGIXML_HAS_LONG_LONG | |
5309 PUGI_IMPL_FN long long xml_attribute::as_llong(long long def) const | |
5310 { | |
5311 if (!_attr) return def; | |
5312 const char_t* value = _attr->value; | |
5313 return value ? impl::get_value_llong(value) : def; | |
5314 } | |
5315 | |
5316 PUGI_IMPL_FN unsigned long long xml_attribute::as_ullong(unsigned long long def) const | |
5317 { | |
5318 if (!_attr) return def; | |
5319 const char_t* value = _attr->value; | |
5320 return value ? impl::get_value_ullong(value) : def; | |
5321 } | |
5322 #endif | |
5323 | |
5324 PUGI_IMPL_FN bool xml_attribute::empty() const | |
5325 { | |
5326 return !_attr; | |
5327 } | |
5328 | |
5329 PUGI_IMPL_FN const char_t* xml_attribute::name() const | |
5330 { | |
5331 if (!_attr) return PUGIXML_TEXT(""); | |
5332 const char_t* name = _attr->name; | |
5333 return name ? name : PUGIXML_TEXT(""); | |
5334 } | |
5335 | |
5336 PUGI_IMPL_FN const char_t* xml_attribute::value() const | |
5337 { | |
5338 if (!_attr) return PUGIXML_TEXT(""); | |
5339 const char_t* value = _attr->value; | |
5340 return value ? value : PUGIXML_TEXT(""); | |
5341 } | |
5342 | |
5343 PUGI_IMPL_FN size_t xml_attribute::hash_value() const | |
5344 { | |
5345 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_attr) / sizeof(xml_attribute_struct)); | |
5346 } | |
5347 | |
5348 PUGI_IMPL_FN xml_attribute_struct* xml_attribute::internal_object() const | |
5349 { | |
5350 return _attr; | |
5351 } | |
5352 | |
5353 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(const char_t* rhs) | |
5354 { | |
5355 set_value(rhs); | |
5356 return *this; | |
5357 } | |
5358 | |
5359 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(int rhs) | |
5360 { | |
5361 set_value(rhs); | |
5362 return *this; | |
5363 } | |
5364 | |
5365 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned int rhs) | |
5366 { | |
5367 set_value(rhs); | |
5368 return *this; | |
5369 } | |
5370 | |
5371 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long rhs) | |
5372 { | |
5373 set_value(rhs); | |
5374 return *this; | |
5375 } | |
5376 | |
5377 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long rhs) | |
5378 { | |
5379 set_value(rhs); | |
5380 return *this; | |
5381 } | |
5382 | |
5383 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(double rhs) | |
5384 { | |
5385 set_value(rhs); | |
5386 return *this; | |
5387 } | |
5388 | |
5389 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(float rhs) | |
5390 { | |
5391 set_value(rhs); | |
5392 return *this; | |
5393 } | |
5394 | |
5395 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(bool rhs) | |
5396 { | |
5397 set_value(rhs); | |
5398 return *this; | |
5399 } | |
5400 | |
5401 #ifdef PUGIXML_HAS_LONG_LONG | |
5402 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(long long rhs) | |
5403 { | |
5404 set_value(rhs); | |
5405 return *this; | |
5406 } | |
5407 | |
5408 PUGI_IMPL_FN xml_attribute& xml_attribute::operator=(unsigned long long rhs) | |
5409 { | |
5410 set_value(rhs); | |
5411 return *this; | |
5412 } | |
5413 #endif | |
5414 | |
5415 PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs) | |
5416 { | |
5417 if (!_attr) return false; | |
5418 | |
5419 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
5420 } | |
5421 | |
5422 PUGI_IMPL_FN bool xml_attribute::set_name(const char_t* rhs, size_t size) | |
5423 { | |
5424 if (!_attr) return false; | |
5425 | |
5426 return impl::strcpy_insitu(_attr->name, _attr->header, impl::xml_memory_page_name_allocated_mask, rhs, size); | |
5427 } | |
5428 | |
5429 PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs) | |
5430 { | |
5431 if (!_attr) return false; | |
5432 | |
5433 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
5434 } | |
5435 | |
5436 PUGI_IMPL_FN bool xml_attribute::set_value(const char_t* rhs, size_t size) | |
5437 { | |
5438 if (!_attr) return false; | |
5439 | |
5440 return impl::strcpy_insitu(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, size); | |
5441 } | |
5442 | |
5443 PUGI_IMPL_FN bool xml_attribute::set_value(int rhs) | |
5444 { | |
5445 if (!_attr) return false; | |
5446 | |
5447 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5448 } | |
5449 | |
5450 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned int rhs) | |
5451 { | |
5452 if (!_attr) return false; | |
5453 | |
5454 return impl::set_value_integer<unsigned int>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5455 } | |
5456 | |
5457 PUGI_IMPL_FN bool xml_attribute::set_value(long rhs) | |
5458 { | |
5459 if (!_attr) return false; | |
5460 | |
5461 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5462 } | |
5463 | |
5464 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long rhs) | |
5465 { | |
5466 if (!_attr) return false; | |
5467 | |
5468 return impl::set_value_integer<unsigned long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5469 } | |
5470 | |
5471 PUGI_IMPL_FN bool xml_attribute::set_value(double rhs) | |
5472 { | |
5473 if (!_attr) return false; | |
5474 | |
5475 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision); | |
5476 } | |
5477 | |
5478 PUGI_IMPL_FN bool xml_attribute::set_value(double rhs, int precision) | |
5479 { | |
5480 if (!_attr) return false; | |
5481 | |
5482 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
5483 } | |
5484 | |
5485 PUGI_IMPL_FN bool xml_attribute::set_value(float rhs) | |
5486 { | |
5487 if (!_attr) return false; | |
5488 | |
5489 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision); | |
5490 } | |
5491 | |
5492 PUGI_IMPL_FN bool xml_attribute::set_value(float rhs, int precision) | |
5493 { | |
5494 if (!_attr) return false; | |
5495 | |
5496 return impl::set_value_convert(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, precision); | |
5497 } | |
5498 | |
5499 PUGI_IMPL_FN bool xml_attribute::set_value(bool rhs) | |
5500 { | |
5501 if (!_attr) return false; | |
5502 | |
5503 return impl::set_value_bool(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs); | |
5504 } | |
5505 | |
5506 #ifdef PUGIXML_HAS_LONG_LONG | |
5507 PUGI_IMPL_FN bool xml_attribute::set_value(long long rhs) | |
5508 { | |
5509 if (!_attr) return false; | |
5510 | |
5511 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0); | |
5512 } | |
5513 | |
5514 PUGI_IMPL_FN bool xml_attribute::set_value(unsigned long long rhs) | |
5515 { | |
5516 if (!_attr) return false; | |
5517 | |
5518 return impl::set_value_integer<unsigned long long>(_attr->value, _attr->header, impl::xml_memory_page_value_allocated_mask, rhs, false); | |
5519 } | |
5520 #endif | |
5521 | |
5522 #ifdef __BORLANDC__ | |
5523 PUGI_IMPL_FN bool operator&&(const xml_attribute& lhs, bool rhs) | |
5524 { | |
5525 return (bool)lhs && rhs; | |
5526 } | |
5527 | |
5528 PUGI_IMPL_FN bool operator||(const xml_attribute& lhs, bool rhs) | |
5529 { | |
5530 return (bool)lhs || rhs; | |
5531 } | |
5532 #endif | |
5533 | |
5534 PUGI_IMPL_FN xml_node::xml_node(): _root(0) | |
5535 { | |
5536 } | |
5537 | |
5538 PUGI_IMPL_FN xml_node::xml_node(xml_node_struct* p): _root(p) | |
5539 { | |
5540 } | |
5541 | |
5542 PUGI_IMPL_FN static void unspecified_bool_xml_node(xml_node***) | |
5543 { | |
5544 } | |
5545 | |
5546 PUGI_IMPL_FN xml_node::operator xml_node::unspecified_bool_type() const | |
5547 { | |
5548 return _root ? unspecified_bool_xml_node : 0; | |
5549 } | |
5550 | |
5551 PUGI_IMPL_FN bool xml_node::operator!() const | |
5552 { | |
5553 return !_root; | |
5554 } | |
5555 | |
5556 PUGI_IMPL_FN xml_node::iterator xml_node::begin() const | |
5557 { | |
5558 return iterator(_root ? _root->first_child + 0 : 0, _root); | |
5559 } | |
5560 | |
5561 PUGI_IMPL_FN xml_node::iterator xml_node::end() const | |
5562 { | |
5563 return iterator(0, _root); | |
5564 } | |
5565 | |
5566 PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_begin() const | |
5567 { | |
5568 return attribute_iterator(_root ? _root->first_attribute + 0 : 0, _root); | |
5569 } | |
5570 | |
5571 PUGI_IMPL_FN xml_node::attribute_iterator xml_node::attributes_end() const | |
5572 { | |
5573 return attribute_iterator(0, _root); | |
5574 } | |
5575 | |
5576 PUGI_IMPL_FN xml_object_range<xml_node_iterator> xml_node::children() const | |
5577 { | |
5578 return xml_object_range<xml_node_iterator>(begin(), end()); | |
5579 } | |
5580 | |
5581 PUGI_IMPL_FN xml_object_range<xml_named_node_iterator> xml_node::children(const char_t* name_) const | |
5582 { | |
5583 return xml_object_range<xml_named_node_iterator>(xml_named_node_iterator(child(name_)._root, _root, name_), xml_named_node_iterator(0, _root, name_)); | |
5584 } | |
5585 | |
5586 PUGI_IMPL_FN xml_object_range<xml_attribute_iterator> xml_node::attributes() const | |
5587 { | |
5588 return xml_object_range<xml_attribute_iterator>(attributes_begin(), attributes_end()); | |
5589 } | |
5590 | |
5591 PUGI_IMPL_FN bool xml_node::operator==(const xml_node& r) const | |
5592 { | |
5593 return (_root == r._root); | |
5594 } | |
5595 | |
5596 PUGI_IMPL_FN bool xml_node::operator!=(const xml_node& r) const | |
5597 { | |
5598 return (_root != r._root); | |
5599 } | |
5600 | |
5601 PUGI_IMPL_FN bool xml_node::operator<(const xml_node& r) const | |
5602 { | |
5603 return (_root < r._root); | |
5604 } | |
5605 | |
5606 PUGI_IMPL_FN bool xml_node::operator>(const xml_node& r) const | |
5607 { | |
5608 return (_root > r._root); | |
5609 } | |
5610 | |
5611 PUGI_IMPL_FN bool xml_node::operator<=(const xml_node& r) const | |
5612 { | |
5613 return (_root <= r._root); | |
5614 } | |
5615 | |
5616 PUGI_IMPL_FN bool xml_node::operator>=(const xml_node& r) const | |
5617 { | |
5618 return (_root >= r._root); | |
5619 } | |
5620 | |
5621 PUGI_IMPL_FN bool xml_node::empty() const | |
5622 { | |
5623 return !_root; | |
5624 } | |
5625 | |
5626 PUGI_IMPL_FN const char_t* xml_node::name() const | |
5627 { | |
5628 if (!_root) return PUGIXML_TEXT(""); | |
5629 const char_t* name = _root->name; | |
5630 return name ? name : PUGIXML_TEXT(""); | |
5631 } | |
5632 | |
5633 PUGI_IMPL_FN xml_node_type xml_node::type() const | |
5634 { | |
5635 return _root ? PUGI_IMPL_NODETYPE(_root) : node_null; | |
5636 } | |
5637 | |
5638 PUGI_IMPL_FN const char_t* xml_node::value() const | |
5639 { | |
5640 if (!_root) return PUGIXML_TEXT(""); | |
5641 const char_t* value = _root->value; | |
5642 return value ? value : PUGIXML_TEXT(""); | |
5643 } | |
5644 | |
5645 PUGI_IMPL_FN xml_node xml_node::child(const char_t* name_) const | |
5646 { | |
5647 if (!_root) return xml_node(); | |
5648 | |
5649 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
5650 { | |
5651 const char_t* iname = i->name; | |
5652 if (iname && impl::strequal(name_, iname)) | |
5653 return xml_node(i); | |
5654 } | |
5655 | |
5656 return xml_node(); | |
5657 } | |
5658 | |
5659 PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_) const | |
5660 { | |
5661 if (!_root) return xml_attribute(); | |
5662 | |
5663 for (xml_attribute_struct* i = _root->first_attribute; i; i = i->next_attribute) | |
5664 { | |
5665 const char_t* iname = i->name; | |
5666 if (iname && impl::strequal(name_, iname)) | |
5667 return xml_attribute(i); | |
5668 } | |
5669 | |
5670 return xml_attribute(); | |
5671 } | |
5672 | |
5673 PUGI_IMPL_FN xml_node xml_node::next_sibling(const char_t* name_) const | |
5674 { | |
5675 if (!_root) return xml_node(); | |
5676 | |
5677 for (xml_node_struct* i = _root->next_sibling; i; i = i->next_sibling) | |
5678 { | |
5679 const char_t* iname = i->name; | |
5680 if (iname && impl::strequal(name_, iname)) | |
5681 return xml_node(i); | |
5682 } | |
5683 | |
5684 return xml_node(); | |
5685 } | |
5686 | |
5687 PUGI_IMPL_FN xml_node xml_node::next_sibling() const | |
5688 { | |
5689 return _root ? xml_node(_root->next_sibling) : xml_node(); | |
5690 } | |
5691 | |
5692 PUGI_IMPL_FN xml_node xml_node::previous_sibling(const char_t* name_) const | |
5693 { | |
5694 if (!_root) return xml_node(); | |
5695 | |
5696 for (xml_node_struct* i = _root->prev_sibling_c; i->next_sibling; i = i->prev_sibling_c) | |
5697 { | |
5698 const char_t* iname = i->name; | |
5699 if (iname && impl::strequal(name_, iname)) | |
5700 return xml_node(i); | |
5701 } | |
5702 | |
5703 return xml_node(); | |
5704 } | |
5705 | |
5706 PUGI_IMPL_FN xml_attribute xml_node::attribute(const char_t* name_, xml_attribute& hint_) const | |
5707 { | |
5708 xml_attribute_struct* hint = hint_._attr; | |
5709 | |
5710 // if hint is not an attribute of node, behavior is not defined | |
5711 assert(!hint || (_root && impl::is_attribute_of(hint, _root))); | |
5712 | |
5713 if (!_root) return xml_attribute(); | |
5714 | |
5715 // optimistically search from hint up until the end | |
5716 for (xml_attribute_struct* i = hint; i; i = i->next_attribute) | |
5717 { | |
5718 const char_t* iname = i->name; | |
5719 if (iname && impl::strequal(name_, iname)) | |
5720 { | |
5721 // update hint to maximize efficiency of searching for consecutive attributes | |
5722 hint_._attr = i->next_attribute; | |
5723 | |
5724 return xml_attribute(i); | |
5725 } | |
5726 } | |
5727 | |
5728 // wrap around and search from the first attribute until the hint | |
5729 // 'j' null pointer check is technically redundant, but it prevents a crash in case the assertion above fails | |
5730 for (xml_attribute_struct* j = _root->first_attribute; j && j != hint; j = j->next_attribute) | |
5731 { | |
5732 const char_t* jname = j->name; | |
5733 if (jname && impl::strequal(name_, jname)) | |
5734 { | |
5735 // update hint to maximize efficiency of searching for consecutive attributes | |
5736 hint_._attr = j->next_attribute; | |
5737 | |
5738 return xml_attribute(j); | |
5739 } | |
5740 } | |
5741 | |
5742 return xml_attribute(); | |
5743 } | |
5744 | |
5745 PUGI_IMPL_FN xml_node xml_node::previous_sibling() const | |
5746 { | |
5747 if (!_root) return xml_node(); | |
5748 xml_node_struct* prev = _root->prev_sibling_c; | |
5749 return prev->next_sibling ? xml_node(prev) : xml_node(); | |
5750 } | |
5751 | |
5752 PUGI_IMPL_FN xml_node xml_node::parent() const | |
5753 { | |
5754 return _root ? xml_node(_root->parent) : xml_node(); | |
5755 } | |
5756 | |
5757 PUGI_IMPL_FN xml_node xml_node::root() const | |
5758 { | |
5759 return _root ? xml_node(&impl::get_document(_root)) : xml_node(); | |
5760 } | |
5761 | |
5762 PUGI_IMPL_FN xml_text xml_node::text() const | |
5763 { | |
5764 return xml_text(_root); | |
5765 } | |
5766 | |
5767 PUGI_IMPL_FN const char_t* xml_node::child_value() const | |
5768 { | |
5769 if (!_root) return PUGIXML_TEXT(""); | |
5770 | |
5771 // element nodes can have value if parse_embed_pcdata was used | |
5772 if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value) | |
5773 return _root->value; | |
5774 | |
5775 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
5776 { | |
5777 const char_t* ivalue = i->value; | |
5778 if (impl::is_text_node(i) && ivalue) | |
5779 return ivalue; | |
5780 } | |
5781 | |
5782 return PUGIXML_TEXT(""); | |
5783 } | |
5784 | |
5785 PUGI_IMPL_FN const char_t* xml_node::child_value(const char_t* name_) const | |
5786 { | |
5787 return child(name_).child_value(); | |
5788 } | |
5789 | |
5790 PUGI_IMPL_FN xml_attribute xml_node::first_attribute() const | |
5791 { | |
5792 if (!_root) return xml_attribute(); | |
5793 return xml_attribute(_root->first_attribute); | |
5794 } | |
5795 | |
5796 PUGI_IMPL_FN xml_attribute xml_node::last_attribute() const | |
5797 { | |
5798 if (!_root) return xml_attribute(); | |
5799 xml_attribute_struct* first = _root->first_attribute; | |
5800 return first ? xml_attribute(first->prev_attribute_c) : xml_attribute(); | |
5801 } | |
5802 | |
5803 PUGI_IMPL_FN xml_node xml_node::first_child() const | |
5804 { | |
5805 if (!_root) return xml_node(); | |
5806 return xml_node(_root->first_child); | |
5807 } | |
5808 | |
5809 PUGI_IMPL_FN xml_node xml_node::last_child() const | |
5810 { | |
5811 if (!_root) return xml_node(); | |
5812 xml_node_struct* first = _root->first_child; | |
5813 return first ? xml_node(first->prev_sibling_c) : xml_node(); | |
5814 } | |
5815 | |
5816 PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs) | |
5817 { | |
5818 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null; | |
5819 | |
5820 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) | |
5821 return false; | |
5822 | |
5823 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, impl::strlength(rhs)); | |
5824 } | |
5825 | |
5826 PUGI_IMPL_FN bool xml_node::set_name(const char_t* rhs, size_t size) | |
5827 { | |
5828 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null; | |
5829 | |
5830 if (type_ != node_element && type_ != node_pi && type_ != node_declaration) | |
5831 return false; | |
5832 | |
5833 return impl::strcpy_insitu(_root->name, _root->header, impl::xml_memory_page_name_allocated_mask, rhs, size); | |
5834 } | |
5835 | |
5836 PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs) | |
5837 { | |
5838 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null; | |
5839 | |
5840 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
5841 return false; | |
5842 | |
5843 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)); | |
5844 } | |
5845 | |
5846 PUGI_IMPL_FN bool xml_node::set_value(const char_t* rhs, size_t size) | |
5847 { | |
5848 xml_node_type type_ = _root ? PUGI_IMPL_NODETYPE(_root) : node_null; | |
5849 | |
5850 if (type_ != node_pcdata && type_ != node_cdata && type_ != node_comment && type_ != node_pi && type_ != node_doctype) | |
5851 return false; | |
5852 | |
5853 return impl::strcpy_insitu(_root->value, _root->header, impl::xml_memory_page_value_allocated_mask, rhs, size); | |
5854 } | |
5855 | |
5856 PUGI_IMPL_FN xml_attribute xml_node::append_attribute(const char_t* name_) | |
5857 { | |
5858 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5859 | |
5860 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5861 if (!alloc.reserve()) return xml_attribute(); | |
5862 | |
5863 xml_attribute a(impl::allocate_attribute(alloc)); | |
5864 if (!a) return xml_attribute(); | |
5865 | |
5866 impl::append_attribute(a._attr, _root); | |
5867 | |
5868 a.set_name(name_); | |
5869 | |
5870 return a; | |
5871 } | |
5872 | |
5873 PUGI_IMPL_FN xml_attribute xml_node::prepend_attribute(const char_t* name_) | |
5874 { | |
5875 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5876 | |
5877 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5878 if (!alloc.reserve()) return xml_attribute(); | |
5879 | |
5880 xml_attribute a(impl::allocate_attribute(alloc)); | |
5881 if (!a) return xml_attribute(); | |
5882 | |
5883 impl::prepend_attribute(a._attr, _root); | |
5884 | |
5885 a.set_name(name_); | |
5886 | |
5887 return a; | |
5888 } | |
5889 | |
5890 PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_after(const char_t* name_, const xml_attribute& attr) | |
5891 { | |
5892 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5893 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5894 | |
5895 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5896 if (!alloc.reserve()) return xml_attribute(); | |
5897 | |
5898 xml_attribute a(impl::allocate_attribute(alloc)); | |
5899 if (!a) return xml_attribute(); | |
5900 | |
5901 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
5902 | |
5903 a.set_name(name_); | |
5904 | |
5905 return a; | |
5906 } | |
5907 | |
5908 PUGI_IMPL_FN xml_attribute xml_node::insert_attribute_before(const char_t* name_, const xml_attribute& attr) | |
5909 { | |
5910 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5911 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5912 | |
5913 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5914 if (!alloc.reserve()) return xml_attribute(); | |
5915 | |
5916 xml_attribute a(impl::allocate_attribute(alloc)); | |
5917 if (!a) return xml_attribute(); | |
5918 | |
5919 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
5920 | |
5921 a.set_name(name_); | |
5922 | |
5923 return a; | |
5924 } | |
5925 | |
5926 PUGI_IMPL_FN xml_attribute xml_node::append_copy(const xml_attribute& proto) | |
5927 { | |
5928 if (!proto) return xml_attribute(); | |
5929 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5930 | |
5931 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5932 if (!alloc.reserve()) return xml_attribute(); | |
5933 | |
5934 xml_attribute a(impl::allocate_attribute(alloc)); | |
5935 if (!a) return xml_attribute(); | |
5936 | |
5937 impl::append_attribute(a._attr, _root); | |
5938 impl::node_copy_attribute(a._attr, proto._attr); | |
5939 | |
5940 return a; | |
5941 } | |
5942 | |
5943 PUGI_IMPL_FN xml_attribute xml_node::prepend_copy(const xml_attribute& proto) | |
5944 { | |
5945 if (!proto) return xml_attribute(); | |
5946 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5947 | |
5948 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5949 if (!alloc.reserve()) return xml_attribute(); | |
5950 | |
5951 xml_attribute a(impl::allocate_attribute(alloc)); | |
5952 if (!a) return xml_attribute(); | |
5953 | |
5954 impl::prepend_attribute(a._attr, _root); | |
5955 impl::node_copy_attribute(a._attr, proto._attr); | |
5956 | |
5957 return a; | |
5958 } | |
5959 | |
5960 PUGI_IMPL_FN xml_attribute xml_node::insert_copy_after(const xml_attribute& proto, const xml_attribute& attr) | |
5961 { | |
5962 if (!proto) return xml_attribute(); | |
5963 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5964 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5965 | |
5966 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5967 if (!alloc.reserve()) return xml_attribute(); | |
5968 | |
5969 xml_attribute a(impl::allocate_attribute(alloc)); | |
5970 if (!a) return xml_attribute(); | |
5971 | |
5972 impl::insert_attribute_after(a._attr, attr._attr, _root); | |
5973 impl::node_copy_attribute(a._attr, proto._attr); | |
5974 | |
5975 return a; | |
5976 } | |
5977 | |
5978 PUGI_IMPL_FN xml_attribute xml_node::insert_copy_before(const xml_attribute& proto, const xml_attribute& attr) | |
5979 { | |
5980 if (!proto) return xml_attribute(); | |
5981 if (!impl::allow_insert_attribute(type())) return xml_attribute(); | |
5982 if (!attr || !impl::is_attribute_of(attr._attr, _root)) return xml_attribute(); | |
5983 | |
5984 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
5985 if (!alloc.reserve()) return xml_attribute(); | |
5986 | |
5987 xml_attribute a(impl::allocate_attribute(alloc)); | |
5988 if (!a) return xml_attribute(); | |
5989 | |
5990 impl::insert_attribute_before(a._attr, attr._attr, _root); | |
5991 impl::node_copy_attribute(a._attr, proto._attr); | |
5992 | |
5993 return a; | |
5994 } | |
5995 | |
5996 PUGI_IMPL_FN xml_node xml_node::append_child(xml_node_type type_) | |
5997 { | |
5998 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
5999 | |
6000 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6001 if (!alloc.reserve()) return xml_node(); | |
6002 | |
6003 xml_node n(impl::allocate_node(alloc, type_)); | |
6004 if (!n) return xml_node(); | |
6005 | |
6006 impl::append_node(n._root, _root); | |
6007 | |
6008 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
6009 | |
6010 return n; | |
6011 } | |
6012 | |
6013 PUGI_IMPL_FN xml_node xml_node::prepend_child(xml_node_type type_) | |
6014 { | |
6015 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6016 | |
6017 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6018 if (!alloc.reserve()) return xml_node(); | |
6019 | |
6020 xml_node n(impl::allocate_node(alloc, type_)); | |
6021 if (!n) return xml_node(); | |
6022 | |
6023 impl::prepend_node(n._root, _root); | |
6024 | |
6025 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
6026 | |
6027 return n; | |
6028 } | |
6029 | |
6030 PUGI_IMPL_FN xml_node xml_node::insert_child_before(xml_node_type type_, const xml_node& node) | |
6031 { | |
6032 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6033 if (!node._root || node._root->parent != _root) return xml_node(); | |
6034 | |
6035 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6036 if (!alloc.reserve()) return xml_node(); | |
6037 | |
6038 xml_node n(impl::allocate_node(alloc, type_)); | |
6039 if (!n) return xml_node(); | |
6040 | |
6041 impl::insert_node_before(n._root, node._root); | |
6042 | |
6043 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
6044 | |
6045 return n; | |
6046 } | |
6047 | |
6048 PUGI_IMPL_FN xml_node xml_node::insert_child_after(xml_node_type type_, const xml_node& node) | |
6049 { | |
6050 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6051 if (!node._root || node._root->parent != _root) return xml_node(); | |
6052 | |
6053 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6054 if (!alloc.reserve()) return xml_node(); | |
6055 | |
6056 xml_node n(impl::allocate_node(alloc, type_)); | |
6057 if (!n) return xml_node(); | |
6058 | |
6059 impl::insert_node_after(n._root, node._root); | |
6060 | |
6061 if (type_ == node_declaration) n.set_name(PUGIXML_TEXT("xml")); | |
6062 | |
6063 return n; | |
6064 } | |
6065 | |
6066 PUGI_IMPL_FN xml_node xml_node::append_child(const char_t* name_) | |
6067 { | |
6068 xml_node result = append_child(node_element); | |
6069 | |
6070 result.set_name(name_); | |
6071 | |
6072 return result; | |
6073 } | |
6074 | |
6075 PUGI_IMPL_FN xml_node xml_node::prepend_child(const char_t* name_) | |
6076 { | |
6077 xml_node result = prepend_child(node_element); | |
6078 | |
6079 result.set_name(name_); | |
6080 | |
6081 return result; | |
6082 } | |
6083 | |
6084 PUGI_IMPL_FN xml_node xml_node::insert_child_after(const char_t* name_, const xml_node& node) | |
6085 { | |
6086 xml_node result = insert_child_after(node_element, node); | |
6087 | |
6088 result.set_name(name_); | |
6089 | |
6090 return result; | |
6091 } | |
6092 | |
6093 PUGI_IMPL_FN xml_node xml_node::insert_child_before(const char_t* name_, const xml_node& node) | |
6094 { | |
6095 xml_node result = insert_child_before(node_element, node); | |
6096 | |
6097 result.set_name(name_); | |
6098 | |
6099 return result; | |
6100 } | |
6101 | |
6102 PUGI_IMPL_FN xml_node xml_node::append_copy(const xml_node& proto) | |
6103 { | |
6104 xml_node_type type_ = proto.type(); | |
6105 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6106 | |
6107 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6108 if (!alloc.reserve()) return xml_node(); | |
6109 | |
6110 xml_node n(impl::allocate_node(alloc, type_)); | |
6111 if (!n) return xml_node(); | |
6112 | |
6113 impl::append_node(n._root, _root); | |
6114 impl::node_copy_tree(n._root, proto._root); | |
6115 | |
6116 return n; | |
6117 } | |
6118 | |
6119 PUGI_IMPL_FN xml_node xml_node::prepend_copy(const xml_node& proto) | |
6120 { | |
6121 xml_node_type type_ = proto.type(); | |
6122 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6123 | |
6124 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6125 if (!alloc.reserve()) return xml_node(); | |
6126 | |
6127 xml_node n(impl::allocate_node(alloc, type_)); | |
6128 if (!n) return xml_node(); | |
6129 | |
6130 impl::prepend_node(n._root, _root); | |
6131 impl::node_copy_tree(n._root, proto._root); | |
6132 | |
6133 return n; | |
6134 } | |
6135 | |
6136 PUGI_IMPL_FN xml_node xml_node::insert_copy_after(const xml_node& proto, const xml_node& node) | |
6137 { | |
6138 xml_node_type type_ = proto.type(); | |
6139 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6140 if (!node._root || node._root->parent != _root) return xml_node(); | |
6141 | |
6142 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6143 if (!alloc.reserve()) return xml_node(); | |
6144 | |
6145 xml_node n(impl::allocate_node(alloc, type_)); | |
6146 if (!n) return xml_node(); | |
6147 | |
6148 impl::insert_node_after(n._root, node._root); | |
6149 impl::node_copy_tree(n._root, proto._root); | |
6150 | |
6151 return n; | |
6152 } | |
6153 | |
6154 PUGI_IMPL_FN xml_node xml_node::insert_copy_before(const xml_node& proto, const xml_node& node) | |
6155 { | |
6156 xml_node_type type_ = proto.type(); | |
6157 if (!impl::allow_insert_child(type(), type_)) return xml_node(); | |
6158 if (!node._root || node._root->parent != _root) return xml_node(); | |
6159 | |
6160 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6161 if (!alloc.reserve()) return xml_node(); | |
6162 | |
6163 xml_node n(impl::allocate_node(alloc, type_)); | |
6164 if (!n) return xml_node(); | |
6165 | |
6166 impl::insert_node_before(n._root, node._root); | |
6167 impl::node_copy_tree(n._root, proto._root); | |
6168 | |
6169 return n; | |
6170 } | |
6171 | |
6172 PUGI_IMPL_FN xml_node xml_node::append_move(const xml_node& moved) | |
6173 { | |
6174 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6175 | |
6176 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6177 if (!alloc.reserve()) return xml_node(); | |
6178 | |
6179 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6180 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6181 | |
6182 impl::remove_node(moved._root); | |
6183 impl::append_node(moved._root, _root); | |
6184 | |
6185 return moved; | |
6186 } | |
6187 | |
6188 PUGI_IMPL_FN xml_node xml_node::prepend_move(const xml_node& moved) | |
6189 { | |
6190 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6191 | |
6192 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6193 if (!alloc.reserve()) return xml_node(); | |
6194 | |
6195 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6196 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6197 | |
6198 impl::remove_node(moved._root); | |
6199 impl::prepend_node(moved._root, _root); | |
6200 | |
6201 return moved; | |
6202 } | |
6203 | |
6204 PUGI_IMPL_FN xml_node xml_node::insert_move_after(const xml_node& moved, const xml_node& node) | |
6205 { | |
6206 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6207 if (!node._root || node._root->parent != _root) return xml_node(); | |
6208 if (moved._root == node._root) return xml_node(); | |
6209 | |
6210 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6211 if (!alloc.reserve()) return xml_node(); | |
6212 | |
6213 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6214 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6215 | |
6216 impl::remove_node(moved._root); | |
6217 impl::insert_node_after(moved._root, node._root); | |
6218 | |
6219 return moved; | |
6220 } | |
6221 | |
6222 PUGI_IMPL_FN xml_node xml_node::insert_move_before(const xml_node& moved, const xml_node& node) | |
6223 { | |
6224 if (!impl::allow_move(*this, moved)) return xml_node(); | |
6225 if (!node._root || node._root->parent != _root) return xml_node(); | |
6226 if (moved._root == node._root) return xml_node(); | |
6227 | |
6228 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6229 if (!alloc.reserve()) return xml_node(); | |
6230 | |
6231 // disable document_buffer_order optimization since moving nodes around changes document order without changing buffer pointers | |
6232 impl::get_document(_root).header |= impl::xml_memory_page_contents_shared_mask; | |
6233 | |
6234 impl::remove_node(moved._root); | |
6235 impl::insert_node_before(moved._root, node._root); | |
6236 | |
6237 return moved; | |
6238 } | |
6239 | |
6240 PUGI_IMPL_FN bool xml_node::remove_attribute(const char_t* name_) | |
6241 { | |
6242 return remove_attribute(attribute(name_)); | |
6243 } | |
6244 | |
6245 PUGI_IMPL_FN bool xml_node::remove_attribute(const xml_attribute& a) | |
6246 { | |
6247 if (!_root || !a._attr) return false; | |
6248 if (!impl::is_attribute_of(a._attr, _root)) return false; | |
6249 | |
6250 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6251 if (!alloc.reserve()) return false; | |
6252 | |
6253 impl::remove_attribute(a._attr, _root); | |
6254 impl::destroy_attribute(a._attr, alloc); | |
6255 | |
6256 return true; | |
6257 } | |
6258 | |
6259 PUGI_IMPL_FN bool xml_node::remove_attributes() | |
6260 { | |
6261 if (!_root) return false; | |
6262 | |
6263 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6264 if (!alloc.reserve()) return false; | |
6265 | |
6266 for (xml_attribute_struct* attr = _root->first_attribute; attr; ) | |
6267 { | |
6268 xml_attribute_struct* next = attr->next_attribute; | |
6269 | |
6270 impl::destroy_attribute(attr, alloc); | |
6271 | |
6272 attr = next; | |
6273 } | |
6274 | |
6275 _root->first_attribute = 0; | |
6276 | |
6277 return true; | |
6278 } | |
6279 | |
6280 PUGI_IMPL_FN bool xml_node::remove_child(const char_t* name_) | |
6281 { | |
6282 return remove_child(child(name_)); | |
6283 } | |
6284 | |
6285 PUGI_IMPL_FN bool xml_node::remove_child(const xml_node& n) | |
6286 { | |
6287 if (!_root || !n._root || n._root->parent != _root) return false; | |
6288 | |
6289 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6290 if (!alloc.reserve()) return false; | |
6291 | |
6292 impl::remove_node(n._root); | |
6293 impl::destroy_node(n._root, alloc); | |
6294 | |
6295 return true; | |
6296 } | |
6297 | |
6298 PUGI_IMPL_FN bool xml_node::remove_children() | |
6299 { | |
6300 if (!_root) return false; | |
6301 | |
6302 impl::xml_allocator& alloc = impl::get_allocator(_root); | |
6303 if (!alloc.reserve()) return false; | |
6304 | |
6305 for (xml_node_struct* cur = _root->first_child; cur; ) | |
6306 { | |
6307 xml_node_struct* next = cur->next_sibling; | |
6308 | |
6309 impl::destroy_node(cur, alloc); | |
6310 | |
6311 cur = next; | |
6312 } | |
6313 | |
6314 _root->first_child = 0; | |
6315 | |
6316 return true; | |
6317 } | |
6318 | |
6319 PUGI_IMPL_FN xml_parse_result xml_node::append_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
6320 { | |
6321 // append_buffer is only valid for elements/documents | |
6322 if (!impl::allow_insert_child(type(), node_element)) return impl::make_parse_result(status_append_invalid_root); | |
6323 | |
6324 // append buffer can not merge PCDATA into existing PCDATA nodes | |
6325 if ((options & parse_merge_pcdata) != 0 && last_child().type() == node_pcdata) return impl::make_parse_result(status_append_invalid_root); | |
6326 | |
6327 // get document node | |
6328 impl::xml_document_struct* doc = &impl::get_document(_root); | |
6329 | |
6330 // disable document_buffer_order optimization since in a document with multiple buffers comparing buffer pointers does not make sense | |
6331 doc->header |= impl::xml_memory_page_contents_shared_mask; | |
6332 | |
6333 // get extra buffer element (we'll store the document fragment buffer there so that we can deallocate it later) | |
6334 impl::xml_memory_page* page = 0; | |
6335 impl::xml_extra_buffer* extra = static_cast<impl::xml_extra_buffer*>(doc->allocate_memory(sizeof(impl::xml_extra_buffer) + sizeof(void*), page)); | |
6336 (void)page; | |
6337 | |
6338 if (!extra) return impl::make_parse_result(status_out_of_memory); | |
6339 | |
6340 #ifdef PUGIXML_COMPACT | |
6341 // align the memory block to a pointer boundary; this is required for compact mode where memory allocations are only 4b aligned | |
6342 // note that this requires up to sizeof(void*)-1 additional memory, which the allocation above takes into account | |
6343 extra = reinterpret_cast<impl::xml_extra_buffer*>((reinterpret_cast<uintptr_t>(extra) + (sizeof(void*) - 1)) & ~(sizeof(void*) - 1)); | |
6344 #endif | |
6345 | |
6346 // add extra buffer to the list | |
6347 extra->buffer = 0; | |
6348 extra->next = doc->extra_buffers; | |
6349 doc->extra_buffers = extra; | |
6350 | |
6351 // name of the root has to be NULL before parsing - otherwise closing node mismatches will not be detected at the top level | |
6352 impl::name_null_sentry sentry(_root); | |
6353 | |
6354 return impl::load_buffer_impl(doc, _root, const_cast<void*>(contents), size, options, encoding, false, false, &extra->buffer); | |
6355 } | |
6356 | |
6357 PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* name_, const char_t* attr_name, const char_t* attr_value) const | |
6358 { | |
6359 if (!_root) return xml_node(); | |
6360 | |
6361 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
6362 { | |
6363 const char_t* iname = i->name; | |
6364 if (iname && impl::strequal(name_, iname)) | |
6365 { | |
6366 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
6367 { | |
6368 const char_t* aname = a->name; | |
6369 if (aname && impl::strequal(attr_name, aname)) | |
6370 { | |
6371 const char_t* avalue = a->value; | |
6372 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
6373 return xml_node(i); | |
6374 } | |
6375 } | |
6376 } | |
6377 } | |
6378 | |
6379 return xml_node(); | |
6380 } | |
6381 | |
6382 PUGI_IMPL_FN xml_node xml_node::find_child_by_attribute(const char_t* attr_name, const char_t* attr_value) const | |
6383 { | |
6384 if (!_root) return xml_node(); | |
6385 | |
6386 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
6387 for (xml_attribute_struct* a = i->first_attribute; a; a = a->next_attribute) | |
6388 { | |
6389 const char_t* aname = a->name; | |
6390 if (aname && impl::strequal(attr_name, aname)) | |
6391 { | |
6392 const char_t* avalue = a->value; | |
6393 if (impl::strequal(attr_value, avalue ? avalue : PUGIXML_TEXT(""))) | |
6394 return xml_node(i); | |
6395 } | |
6396 } | |
6397 | |
6398 return xml_node(); | |
6399 } | |
6400 | |
6401 #ifndef PUGIXML_NO_STL | |
6402 PUGI_IMPL_FN string_t xml_node::path(char_t delimiter) const | |
6403 { | |
6404 if (!_root) return string_t(); | |
6405 | |
6406 size_t offset = 0; | |
6407 | |
6408 for (xml_node_struct* i = _root; i; i = i->parent) | |
6409 { | |
6410 const char_t* iname = i->name; | |
6411 offset += (i != _root); | |
6412 offset += iname ? impl::strlength(iname) : 0; | |
6413 } | |
6414 | |
6415 string_t result; | |
6416 result.resize(offset); | |
6417 | |
6418 for (xml_node_struct* j = _root; j; j = j->parent) | |
6419 { | |
6420 if (j != _root) | |
6421 result[--offset] = delimiter; | |
6422 | |
6423 const char_t* jname = j->name; | |
6424 if (jname) | |
6425 { | |
6426 size_t length = impl::strlength(jname); | |
6427 | |
6428 offset -= length; | |
6429 memcpy(&result[offset], jname, length * sizeof(char_t)); | |
6430 } | |
6431 } | |
6432 | |
6433 assert(offset == 0); | |
6434 | |
6435 return result; | |
6436 } | |
6437 #endif | |
6438 | |
6439 PUGI_IMPL_FN xml_node xml_node::first_element_by_path(const char_t* path_, char_t delimiter) const | |
6440 { | |
6441 xml_node context = path_[0] == delimiter ? root() : *this; | |
6442 | |
6443 if (!context._root) return xml_node(); | |
6444 | |
6445 const char_t* path_segment = path_; | |
6446 | |
6447 while (*path_segment == delimiter) ++path_segment; | |
6448 | |
6449 const char_t* path_segment_end = path_segment; | |
6450 | |
6451 while (*path_segment_end && *path_segment_end != delimiter) ++path_segment_end; | |
6452 | |
6453 if (path_segment == path_segment_end) return context; | |
6454 | |
6455 const char_t* next_segment = path_segment_end; | |
6456 | |
6457 while (*next_segment == delimiter) ++next_segment; | |
6458 | |
6459 if (*path_segment == '.' && path_segment + 1 == path_segment_end) | |
6460 return context.first_element_by_path(next_segment, delimiter); | |
6461 else if (*path_segment == '.' && *(path_segment+1) == '.' && path_segment + 2 == path_segment_end) | |
6462 return context.parent().first_element_by_path(next_segment, delimiter); | |
6463 else | |
6464 { | |
6465 for (xml_node_struct* j = context._root->first_child; j; j = j->next_sibling) | |
6466 { | |
6467 const char_t* jname = j->name; | |
6468 if (jname && impl::strequalrange(jname, path_segment, static_cast<size_t>(path_segment_end - path_segment))) | |
6469 { | |
6470 xml_node subsearch = xml_node(j).first_element_by_path(next_segment, delimiter); | |
6471 | |
6472 if (subsearch) return subsearch; | |
6473 } | |
6474 } | |
6475 | |
6476 return xml_node(); | |
6477 } | |
6478 } | |
6479 | |
6480 PUGI_IMPL_FN bool xml_node::traverse(xml_tree_walker& walker) | |
6481 { | |
6482 walker._depth = -1; | |
6483 | |
6484 xml_node arg_begin(_root); | |
6485 if (!walker.begin(arg_begin)) return false; | |
6486 | |
6487 xml_node_struct* cur = _root ? _root->first_child + 0 : 0; | |
6488 | |
6489 if (cur) | |
6490 { | |
6491 ++walker._depth; | |
6492 | |
6493 do | |
6494 { | |
6495 xml_node arg_for_each(cur); | |
6496 if (!walker.for_each(arg_for_each)) | |
6497 return false; | |
6498 | |
6499 if (cur->first_child) | |
6500 { | |
6501 ++walker._depth; | |
6502 cur = cur->first_child; | |
6503 } | |
6504 else if (cur->next_sibling) | |
6505 cur = cur->next_sibling; | |
6506 else | |
6507 { | |
6508 while (!cur->next_sibling && cur != _root && cur->parent) | |
6509 { | |
6510 --walker._depth; | |
6511 cur = cur->parent; | |
6512 } | |
6513 | |
6514 if (cur != _root) | |
6515 cur = cur->next_sibling; | |
6516 } | |
6517 } | |
6518 while (cur && cur != _root); | |
6519 } | |
6520 | |
6521 assert(walker._depth == -1); | |
6522 | |
6523 xml_node arg_end(_root); | |
6524 return walker.end(arg_end); | |
6525 } | |
6526 | |
6527 PUGI_IMPL_FN size_t xml_node::hash_value() const | |
6528 { | |
6529 return static_cast<size_t>(reinterpret_cast<uintptr_t>(_root) / sizeof(xml_node_struct)); | |
6530 } | |
6531 | |
6532 PUGI_IMPL_FN xml_node_struct* xml_node::internal_object() const | |
6533 { | |
6534 return _root; | |
6535 } | |
6536 | |
6537 PUGI_IMPL_FN void xml_node::print(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
6538 { | |
6539 if (!_root) return; | |
6540 | |
6541 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
6542 | |
6543 impl::node_output(buffered_writer, _root, indent, flags, depth); | |
6544 | |
6545 buffered_writer.flush(); | |
6546 } | |
6547 | |
6548 #ifndef PUGIXML_NO_STL | |
6549 PUGI_IMPL_FN void xml_node::print(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding, unsigned int depth) const | |
6550 { | |
6551 xml_writer_stream writer(stream); | |
6552 | |
6553 print(writer, indent, flags, encoding, depth); | |
6554 } | |
6555 | |
6556 PUGI_IMPL_FN void xml_node::print(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags, unsigned int depth) const | |
6557 { | |
6558 xml_writer_stream writer(stream); | |
6559 | |
6560 print(writer, indent, flags, encoding_wchar, depth); | |
6561 } | |
6562 #endif | |
6563 | |
6564 PUGI_IMPL_FN ptrdiff_t xml_node::offset_debug() const | |
6565 { | |
6566 if (!_root) return -1; | |
6567 | |
6568 impl::xml_document_struct& doc = impl::get_document(_root); | |
6569 | |
6570 // we can determine the offset reliably only if there is exactly once parse buffer | |
6571 if (!doc.buffer || doc.extra_buffers) return -1; | |
6572 | |
6573 switch (type()) | |
6574 { | |
6575 case node_document: | |
6576 return 0; | |
6577 | |
6578 case node_element: | |
6579 case node_declaration: | |
6580 case node_pi: | |
6581 return _root->name && (_root->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0 ? _root->name - doc.buffer : -1; | |
6582 | |
6583 case node_pcdata: | |
6584 case node_cdata: | |
6585 case node_comment: | |
6586 case node_doctype: | |
6587 return _root->value && (_root->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0 ? _root->value - doc.buffer : -1; | |
6588 | |
6589 default: | |
6590 assert(false && "Invalid node type"); // unreachable | |
6591 return -1; | |
6592 } | |
6593 } | |
6594 | |
6595 #ifdef __BORLANDC__ | |
6596 PUGI_IMPL_FN bool operator&&(const xml_node& lhs, bool rhs) | |
6597 { | |
6598 return (bool)lhs && rhs; | |
6599 } | |
6600 | |
6601 PUGI_IMPL_FN bool operator||(const xml_node& lhs, bool rhs) | |
6602 { | |
6603 return (bool)lhs || rhs; | |
6604 } | |
6605 #endif | |
6606 | |
6607 PUGI_IMPL_FN xml_text::xml_text(xml_node_struct* root): _root(root) | |
6608 { | |
6609 } | |
6610 | |
6611 PUGI_IMPL_FN xml_node_struct* xml_text::_data() const | |
6612 { | |
6613 if (!_root || impl::is_text_node(_root)) return _root; | |
6614 | |
6615 // element nodes can have value if parse_embed_pcdata was used | |
6616 if (PUGI_IMPL_NODETYPE(_root) == node_element && _root->value) | |
6617 return _root; | |
6618 | |
6619 for (xml_node_struct* node = _root->first_child; node; node = node->next_sibling) | |
6620 if (impl::is_text_node(node)) | |
6621 return node; | |
6622 | |
6623 return 0; | |
6624 } | |
6625 | |
6626 PUGI_IMPL_FN xml_node_struct* xml_text::_data_new() | |
6627 { | |
6628 xml_node_struct* d = _data(); | |
6629 if (d) return d; | |
6630 | |
6631 return xml_node(_root).append_child(node_pcdata).internal_object(); | |
6632 } | |
6633 | |
6634 PUGI_IMPL_FN xml_text::xml_text(): _root(0) | |
6635 { | |
6636 } | |
6637 | |
6638 PUGI_IMPL_FN static void unspecified_bool_xml_text(xml_text***) | |
6639 { | |
6640 } | |
6641 | |
6642 PUGI_IMPL_FN xml_text::operator xml_text::unspecified_bool_type() const | |
6643 { | |
6644 return _data() ? unspecified_bool_xml_text : 0; | |
6645 } | |
6646 | |
6647 PUGI_IMPL_FN bool xml_text::operator!() const | |
6648 { | |
6649 return !_data(); | |
6650 } | |
6651 | |
6652 PUGI_IMPL_FN bool xml_text::empty() const | |
6653 { | |
6654 return _data() == 0; | |
6655 } | |
6656 | |
6657 PUGI_IMPL_FN const char_t* xml_text::get() const | |
6658 { | |
6659 xml_node_struct* d = _data(); | |
6660 if (!d) return PUGIXML_TEXT(""); | |
6661 const char_t* value = d->value; | |
6662 return value ? value : PUGIXML_TEXT(""); | |
6663 } | |
6664 | |
6665 PUGI_IMPL_FN const char_t* xml_text::as_string(const char_t* def) const | |
6666 { | |
6667 xml_node_struct* d = _data(); | |
6668 if (!d) return def; | |
6669 const char_t* value = d->value; | |
6670 return value ? value : def; | |
6671 } | |
6672 | |
6673 PUGI_IMPL_FN int xml_text::as_int(int def) const | |
6674 { | |
6675 xml_node_struct* d = _data(); | |
6676 if (!d) return def; | |
6677 const char_t* value = d->value; | |
6678 return value ? impl::get_value_int(value) : def; | |
6679 } | |
6680 | |
6681 PUGI_IMPL_FN unsigned int xml_text::as_uint(unsigned int def) const | |
6682 { | |
6683 xml_node_struct* d = _data(); | |
6684 if (!d) return def; | |
6685 const char_t* value = d->value; | |
6686 return value ? impl::get_value_uint(value) : def; | |
6687 } | |
6688 | |
6689 PUGI_IMPL_FN double xml_text::as_double(double def) const | |
6690 { | |
6691 xml_node_struct* d = _data(); | |
6692 if (!d) return def; | |
6693 const char_t* value = d->value; | |
6694 return value ? impl::get_value_double(value) : def; | |
6695 } | |
6696 | |
6697 PUGI_IMPL_FN float xml_text::as_float(float def) const | |
6698 { | |
6699 xml_node_struct* d = _data(); | |
6700 if (!d) return def; | |
6701 const char_t* value = d->value; | |
6702 return value ? impl::get_value_float(value) : def; | |
6703 } | |
6704 | |
6705 PUGI_IMPL_FN bool xml_text::as_bool(bool def) const | |
6706 { | |
6707 xml_node_struct* d = _data(); | |
6708 if (!d) return def; | |
6709 const char_t* value = d->value; | |
6710 return value ? impl::get_value_bool(value) : def; | |
6711 } | |
6712 | |
6713 #ifdef PUGIXML_HAS_LONG_LONG | |
6714 PUGI_IMPL_FN long long xml_text::as_llong(long long def) const | |
6715 { | |
6716 xml_node_struct* d = _data(); | |
6717 if (!d) return def; | |
6718 const char_t* value = d->value; | |
6719 return value ? impl::get_value_llong(value) : def; | |
6720 } | |
6721 | |
6722 PUGI_IMPL_FN unsigned long long xml_text::as_ullong(unsigned long long def) const | |
6723 { | |
6724 xml_node_struct* d = _data(); | |
6725 if (!d) return def; | |
6726 const char_t* value = d->value; | |
6727 return value ? impl::get_value_ullong(value) : def; | |
6728 } | |
6729 #endif | |
6730 | |
6731 PUGI_IMPL_FN bool xml_text::set(const char_t* rhs) | |
6732 { | |
6733 xml_node_struct* dn = _data_new(); | |
6734 | |
6735 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, impl::strlength(rhs)) : false; | |
6736 } | |
6737 | |
6738 PUGI_IMPL_FN bool xml_text::set(const char_t* rhs, size_t size) | |
6739 { | |
6740 xml_node_struct* dn = _data_new(); | |
6741 | |
6742 return dn ? impl::strcpy_insitu(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, size) : false; | |
6743 } | |
6744 | |
6745 PUGI_IMPL_FN bool xml_text::set(int rhs) | |
6746 { | |
6747 xml_node_struct* dn = _data_new(); | |
6748 | |
6749 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6750 } | |
6751 | |
6752 PUGI_IMPL_FN bool xml_text::set(unsigned int rhs) | |
6753 { | |
6754 xml_node_struct* dn = _data_new(); | |
6755 | |
6756 return dn ? impl::set_value_integer<unsigned int>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6757 } | |
6758 | |
6759 PUGI_IMPL_FN bool xml_text::set(long rhs) | |
6760 { | |
6761 xml_node_struct* dn = _data_new(); | |
6762 | |
6763 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6764 } | |
6765 | |
6766 PUGI_IMPL_FN bool xml_text::set(unsigned long rhs) | |
6767 { | |
6768 xml_node_struct* dn = _data_new(); | |
6769 | |
6770 return dn ? impl::set_value_integer<unsigned long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6771 } | |
6772 | |
6773 PUGI_IMPL_FN bool xml_text::set(float rhs) | |
6774 { | |
6775 xml_node_struct* dn = _data_new(); | |
6776 | |
6777 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_float_precision) : false; | |
6778 } | |
6779 | |
6780 PUGI_IMPL_FN bool xml_text::set(float rhs, int precision) | |
6781 { | |
6782 xml_node_struct* dn = _data_new(); | |
6783 | |
6784 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
6785 } | |
6786 | |
6787 PUGI_IMPL_FN bool xml_text::set(double rhs) | |
6788 { | |
6789 xml_node_struct* dn = _data_new(); | |
6790 | |
6791 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, default_double_precision) : false; | |
6792 } | |
6793 | |
6794 PUGI_IMPL_FN bool xml_text::set(double rhs, int precision) | |
6795 { | |
6796 xml_node_struct* dn = _data_new(); | |
6797 | |
6798 return dn ? impl::set_value_convert(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, precision) : false; | |
6799 } | |
6800 | |
6801 PUGI_IMPL_FN bool xml_text::set(bool rhs) | |
6802 { | |
6803 xml_node_struct* dn = _data_new(); | |
6804 | |
6805 return dn ? impl::set_value_bool(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs) : false; | |
6806 } | |
6807 | |
6808 #ifdef PUGIXML_HAS_LONG_LONG | |
6809 PUGI_IMPL_FN bool xml_text::set(long long rhs) | |
6810 { | |
6811 xml_node_struct* dn = _data_new(); | |
6812 | |
6813 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, rhs < 0) : false; | |
6814 } | |
6815 | |
6816 PUGI_IMPL_FN bool xml_text::set(unsigned long long rhs) | |
6817 { | |
6818 xml_node_struct* dn = _data_new(); | |
6819 | |
6820 return dn ? impl::set_value_integer<unsigned long long>(dn->value, dn->header, impl::xml_memory_page_value_allocated_mask, rhs, false) : false; | |
6821 } | |
6822 #endif | |
6823 | |
6824 PUGI_IMPL_FN xml_text& xml_text::operator=(const char_t* rhs) | |
6825 { | |
6826 set(rhs); | |
6827 return *this; | |
6828 } | |
6829 | |
6830 PUGI_IMPL_FN xml_text& xml_text::operator=(int rhs) | |
6831 { | |
6832 set(rhs); | |
6833 return *this; | |
6834 } | |
6835 | |
6836 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned int rhs) | |
6837 { | |
6838 set(rhs); | |
6839 return *this; | |
6840 } | |
6841 | |
6842 PUGI_IMPL_FN xml_text& xml_text::operator=(long rhs) | |
6843 { | |
6844 set(rhs); | |
6845 return *this; | |
6846 } | |
6847 | |
6848 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long rhs) | |
6849 { | |
6850 set(rhs); | |
6851 return *this; | |
6852 } | |
6853 | |
6854 PUGI_IMPL_FN xml_text& xml_text::operator=(double rhs) | |
6855 { | |
6856 set(rhs); | |
6857 return *this; | |
6858 } | |
6859 | |
6860 PUGI_IMPL_FN xml_text& xml_text::operator=(float rhs) | |
6861 { | |
6862 set(rhs); | |
6863 return *this; | |
6864 } | |
6865 | |
6866 PUGI_IMPL_FN xml_text& xml_text::operator=(bool rhs) | |
6867 { | |
6868 set(rhs); | |
6869 return *this; | |
6870 } | |
6871 | |
6872 #ifdef PUGIXML_HAS_LONG_LONG | |
6873 PUGI_IMPL_FN xml_text& xml_text::operator=(long long rhs) | |
6874 { | |
6875 set(rhs); | |
6876 return *this; | |
6877 } | |
6878 | |
6879 PUGI_IMPL_FN xml_text& xml_text::operator=(unsigned long long rhs) | |
6880 { | |
6881 set(rhs); | |
6882 return *this; | |
6883 } | |
6884 #endif | |
6885 | |
6886 PUGI_IMPL_FN xml_node xml_text::data() const | |
6887 { | |
6888 return xml_node(_data()); | |
6889 } | |
6890 | |
6891 #ifdef __BORLANDC__ | |
6892 PUGI_IMPL_FN bool operator&&(const xml_text& lhs, bool rhs) | |
6893 { | |
6894 return (bool)lhs && rhs; | |
6895 } | |
6896 | |
6897 PUGI_IMPL_FN bool operator||(const xml_text& lhs, bool rhs) | |
6898 { | |
6899 return (bool)lhs || rhs; | |
6900 } | |
6901 #endif | |
6902 | |
6903 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator() | |
6904 { | |
6905 } | |
6906 | |
6907 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(const xml_node& node): _wrap(node), _parent(node.parent()) | |
6908 { | |
6909 } | |
6910 | |
6911 PUGI_IMPL_FN xml_node_iterator::xml_node_iterator(xml_node_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
6912 { | |
6913 } | |
6914 | |
6915 PUGI_IMPL_FN bool xml_node_iterator::operator==(const xml_node_iterator& rhs) const | |
6916 { | |
6917 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
6918 } | |
6919 | |
6920 PUGI_IMPL_FN bool xml_node_iterator::operator!=(const xml_node_iterator& rhs) const | |
6921 { | |
6922 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
6923 } | |
6924 | |
6925 PUGI_IMPL_FN xml_node& xml_node_iterator::operator*() const | |
6926 { | |
6927 assert(_wrap._root); | |
6928 return _wrap; | |
6929 } | |
6930 | |
6931 PUGI_IMPL_FN xml_node* xml_node_iterator::operator->() const | |
6932 { | |
6933 assert(_wrap._root); | |
6934 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
6935 } | |
6936 | |
6937 PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator++() | |
6938 { | |
6939 assert(_wrap._root); | |
6940 _wrap._root = _wrap._root->next_sibling; | |
6941 return *this; | |
6942 } | |
6943 | |
6944 PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator++(int) | |
6945 { | |
6946 xml_node_iterator temp = *this; | |
6947 ++*this; | |
6948 return temp; | |
6949 } | |
6950 | |
6951 PUGI_IMPL_FN xml_node_iterator& xml_node_iterator::operator--() | |
6952 { | |
6953 _wrap = _wrap._root ? _wrap.previous_sibling() : _parent.last_child(); | |
6954 return *this; | |
6955 } | |
6956 | |
6957 PUGI_IMPL_FN xml_node_iterator xml_node_iterator::operator--(int) | |
6958 { | |
6959 xml_node_iterator temp = *this; | |
6960 --*this; | |
6961 return temp; | |
6962 } | |
6963 | |
6964 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator() | |
6965 { | |
6966 } | |
6967 | |
6968 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(const xml_attribute& attr, const xml_node& parent): _wrap(attr), _parent(parent) | |
6969 { | |
6970 } | |
6971 | |
6972 PUGI_IMPL_FN xml_attribute_iterator::xml_attribute_iterator(xml_attribute_struct* ref, xml_node_struct* parent): _wrap(ref), _parent(parent) | |
6973 { | |
6974 } | |
6975 | |
6976 PUGI_IMPL_FN bool xml_attribute_iterator::operator==(const xml_attribute_iterator& rhs) const | |
6977 { | |
6978 return _wrap._attr == rhs._wrap._attr && _parent._root == rhs._parent._root; | |
6979 } | |
6980 | |
6981 PUGI_IMPL_FN bool xml_attribute_iterator::operator!=(const xml_attribute_iterator& rhs) const | |
6982 { | |
6983 return _wrap._attr != rhs._wrap._attr || _parent._root != rhs._parent._root; | |
6984 } | |
6985 | |
6986 PUGI_IMPL_FN xml_attribute& xml_attribute_iterator::operator*() const | |
6987 { | |
6988 assert(_wrap._attr); | |
6989 return _wrap; | |
6990 } | |
6991 | |
6992 PUGI_IMPL_FN xml_attribute* xml_attribute_iterator::operator->() const | |
6993 { | |
6994 assert(_wrap._attr); | |
6995 return const_cast<xml_attribute*>(&_wrap); // BCC5 workaround | |
6996 } | |
6997 | |
6998 PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator++() | |
6999 { | |
7000 assert(_wrap._attr); | |
7001 _wrap._attr = _wrap._attr->next_attribute; | |
7002 return *this; | |
7003 } | |
7004 | |
7005 PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator++(int) | |
7006 { | |
7007 xml_attribute_iterator temp = *this; | |
7008 ++*this; | |
7009 return temp; | |
7010 } | |
7011 | |
7012 PUGI_IMPL_FN xml_attribute_iterator& xml_attribute_iterator::operator--() | |
7013 { | |
7014 _wrap = _wrap._attr ? _wrap.previous_attribute() : _parent.last_attribute(); | |
7015 return *this; | |
7016 } | |
7017 | |
7018 PUGI_IMPL_FN xml_attribute_iterator xml_attribute_iterator::operator--(int) | |
7019 { | |
7020 xml_attribute_iterator temp = *this; | |
7021 --*this; | |
7022 return temp; | |
7023 } | |
7024 | |
7025 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(): _name(0) | |
7026 { | |
7027 } | |
7028 | |
7029 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(const xml_node& node, const char_t* name): _wrap(node), _parent(node.parent()), _name(name) | |
7030 { | |
7031 } | |
7032 | |
7033 PUGI_IMPL_FN xml_named_node_iterator::xml_named_node_iterator(xml_node_struct* ref, xml_node_struct* parent, const char_t* name): _wrap(ref), _parent(parent), _name(name) | |
7034 { | |
7035 } | |
7036 | |
7037 PUGI_IMPL_FN bool xml_named_node_iterator::operator==(const xml_named_node_iterator& rhs) const | |
7038 { | |
7039 return _wrap._root == rhs._wrap._root && _parent._root == rhs._parent._root; | |
7040 } | |
7041 | |
7042 PUGI_IMPL_FN bool xml_named_node_iterator::operator!=(const xml_named_node_iterator& rhs) const | |
7043 { | |
7044 return _wrap._root != rhs._wrap._root || _parent._root != rhs._parent._root; | |
7045 } | |
7046 | |
7047 PUGI_IMPL_FN xml_node& xml_named_node_iterator::operator*() const | |
7048 { | |
7049 assert(_wrap._root); | |
7050 return _wrap; | |
7051 } | |
7052 | |
7053 PUGI_IMPL_FN xml_node* xml_named_node_iterator::operator->() const | |
7054 { | |
7055 assert(_wrap._root); | |
7056 return const_cast<xml_node*>(&_wrap); // BCC5 workaround | |
7057 } | |
7058 | |
7059 PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator++() | |
7060 { | |
7061 assert(_wrap._root); | |
7062 _wrap = _wrap.next_sibling(_name); | |
7063 return *this; | |
7064 } | |
7065 | |
7066 PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator++(int) | |
7067 { | |
7068 xml_named_node_iterator temp = *this; | |
7069 ++*this; | |
7070 return temp; | |
7071 } | |
7072 | |
7073 PUGI_IMPL_FN xml_named_node_iterator& xml_named_node_iterator::operator--() | |
7074 { | |
7075 if (_wrap._root) | |
7076 _wrap = _wrap.previous_sibling(_name); | |
7077 else | |
7078 { | |
7079 _wrap = _parent.last_child(); | |
7080 | |
7081 if (!impl::strequal(_wrap.name(), _name)) | |
7082 _wrap = _wrap.previous_sibling(_name); | |
7083 } | |
7084 | |
7085 return *this; | |
7086 } | |
7087 | |
7088 PUGI_IMPL_FN xml_named_node_iterator xml_named_node_iterator::operator--(int) | |
7089 { | |
7090 xml_named_node_iterator temp = *this; | |
7091 --*this; | |
7092 return temp; | |
7093 } | |
7094 | |
7095 PUGI_IMPL_FN xml_parse_result::xml_parse_result(): status(status_internal_error), offset(0), encoding(encoding_auto) | |
7096 { | |
7097 } | |
7098 | |
7099 PUGI_IMPL_FN xml_parse_result::operator bool() const | |
7100 { | |
7101 return status == status_ok; | |
7102 } | |
7103 | |
7104 PUGI_IMPL_FN const char* xml_parse_result::description() const | |
7105 { | |
7106 switch (status) | |
7107 { | |
7108 case status_ok: return "No error"; | |
7109 | |
7110 case status_file_not_found: return "File was not found"; | |
7111 case status_io_error: return "Error reading from file/stream"; | |
7112 case status_out_of_memory: return "Could not allocate memory"; | |
7113 case status_internal_error: return "Internal error occurred"; | |
7114 | |
7115 case status_unrecognized_tag: return "Could not determine tag type"; | |
7116 | |
7117 case status_bad_pi: return "Error parsing document declaration/processing instruction"; | |
7118 case status_bad_comment: return "Error parsing comment"; | |
7119 case status_bad_cdata: return "Error parsing CDATA section"; | |
7120 case status_bad_doctype: return "Error parsing document type declaration"; | |
7121 case status_bad_pcdata: return "Error parsing PCDATA section"; | |
7122 case status_bad_start_element: return "Error parsing start element tag"; | |
7123 case status_bad_attribute: return "Error parsing element attribute"; | |
7124 case status_bad_end_element: return "Error parsing end element tag"; | |
7125 case status_end_element_mismatch: return "Start-end tags mismatch"; | |
7126 | |
7127 case status_append_invalid_root: return "Unable to append nodes: root is not an element or document"; | |
7128 | |
7129 case status_no_document_element: return "No document element found"; | |
7130 | |
7131 default: return "Unknown error"; | |
7132 } | |
7133 } | |
7134 | |
7135 PUGI_IMPL_FN xml_document::xml_document(): _buffer(0) | |
7136 { | |
7137 _create(); | |
7138 } | |
7139 | |
7140 PUGI_IMPL_FN xml_document::~xml_document() | |
7141 { | |
7142 _destroy(); | |
7143 } | |
7144 | |
7145 #ifdef PUGIXML_HAS_MOVE | |
7146 PUGI_IMPL_FN xml_document::xml_document(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT: _buffer(0) | |
7147 { | |
7148 _create(); | |
7149 _move(rhs); | |
7150 } | |
7151 | |
7152 PUGI_IMPL_FN xml_document& xml_document::operator=(xml_document&& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
7153 { | |
7154 if (this == &rhs) return *this; | |
7155 | |
7156 _destroy(); | |
7157 _create(); | |
7158 _move(rhs); | |
7159 | |
7160 return *this; | |
7161 } | |
7162 #endif | |
7163 | |
7164 PUGI_IMPL_FN void xml_document::reset() | |
7165 { | |
7166 _destroy(); | |
7167 _create(); | |
7168 } | |
7169 | |
7170 PUGI_IMPL_FN void xml_document::reset(const xml_document& proto) | |
7171 { | |
7172 reset(); | |
7173 | |
7174 impl::node_copy_tree(_root, proto._root); | |
7175 } | |
7176 | |
7177 PUGI_IMPL_FN void xml_document::_create() | |
7178 { | |
7179 assert(!_root); | |
7180 | |
7181 #ifdef PUGIXML_COMPACT | |
7182 // space for page marker for the first page (uint32_t), rounded up to pointer size; assumes pointers are at least 32-bit | |
7183 const size_t page_offset = sizeof(void*); | |
7184 #else | |
7185 const size_t page_offset = 0; | |
7186 #endif | |
7187 | |
7188 // initialize sentinel page | |
7189 PUGI_IMPL_STATIC_ASSERT(sizeof(impl::xml_memory_page) + sizeof(impl::xml_document_struct) + page_offset <= sizeof(_memory)); | |
7190 | |
7191 // prepare page structure | |
7192 impl::xml_memory_page* page = impl::xml_memory_page::construct(_memory); | |
7193 assert(page); | |
7194 | |
7195 page->busy_size = impl::xml_memory_page_size; | |
7196 | |
7197 // setup first page marker | |
7198 #ifdef PUGIXML_COMPACT | |
7199 // round-trip through void* to avoid 'cast increases required alignment of target type' warning | |
7200 page->compact_page_marker = reinterpret_cast<uint32_t*>(static_cast<void*>(reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page))); | |
7201 *page->compact_page_marker = sizeof(impl::xml_memory_page); | |
7202 #endif | |
7203 | |
7204 // allocate new root | |
7205 _root = new (reinterpret_cast<char*>(page) + sizeof(impl::xml_memory_page) + page_offset) impl::xml_document_struct(page); | |
7206 _root->prev_sibling_c = _root; | |
7207 | |
7208 // setup sentinel page | |
7209 page->allocator = static_cast<impl::xml_document_struct*>(_root); | |
7210 | |
7211 // setup hash table pointer in allocator | |
7212 #ifdef PUGIXML_COMPACT | |
7213 page->allocator->_hash = &static_cast<impl::xml_document_struct*>(_root)->hash; | |
7214 #endif | |
7215 | |
7216 // verify the document allocation | |
7217 assert(reinterpret_cast<char*>(_root) + sizeof(impl::xml_document_struct) <= _memory + sizeof(_memory)); | |
7218 } | |
7219 | |
7220 PUGI_IMPL_FN void xml_document::_destroy() | |
7221 { | |
7222 assert(_root); | |
7223 | |
7224 // destroy static storage | |
7225 if (_buffer) | |
7226 { | |
7227 impl::xml_memory::deallocate(_buffer); | |
7228 _buffer = 0; | |
7229 } | |
7230 | |
7231 // destroy extra buffers (note: no need to destroy linked list nodes, they're allocated using document allocator) | |
7232 for (impl::xml_extra_buffer* extra = static_cast<impl::xml_document_struct*>(_root)->extra_buffers; extra; extra = extra->next) | |
7233 { | |
7234 if (extra->buffer) impl::xml_memory::deallocate(extra->buffer); | |
7235 } | |
7236 | |
7237 // destroy dynamic storage, leave sentinel page (it's in static memory) | |
7238 impl::xml_memory_page* root_page = PUGI_IMPL_GETPAGE(_root); | |
7239 assert(root_page && !root_page->prev); | |
7240 assert(reinterpret_cast<char*>(root_page) >= _memory && reinterpret_cast<char*>(root_page) < _memory + sizeof(_memory)); | |
7241 | |
7242 for (impl::xml_memory_page* page = root_page->next; page; ) | |
7243 { | |
7244 impl::xml_memory_page* next = page->next; | |
7245 | |
7246 impl::xml_allocator::deallocate_page(page); | |
7247 | |
7248 page = next; | |
7249 } | |
7250 | |
7251 #ifdef PUGIXML_COMPACT | |
7252 // destroy hash table | |
7253 static_cast<impl::xml_document_struct*>(_root)->hash.clear(); | |
7254 #endif | |
7255 | |
7256 _root = 0; | |
7257 } | |
7258 | |
7259 #ifdef PUGIXML_HAS_MOVE | |
7260 PUGI_IMPL_FN void xml_document::_move(xml_document& rhs) PUGIXML_NOEXCEPT_IF_NOT_COMPACT | |
7261 { | |
7262 impl::xml_document_struct* doc = static_cast<impl::xml_document_struct*>(_root); | |
7263 impl::xml_document_struct* other = static_cast<impl::xml_document_struct*>(rhs._root); | |
7264 | |
7265 // save first child pointer for later; this needs hash access | |
7266 xml_node_struct* other_first_child = other->first_child; | |
7267 | |
7268 #ifdef PUGIXML_COMPACT | |
7269 // reserve space for the hash table up front; this is the only operation that can fail | |
7270 // if it does, we have no choice but to throw (if we have exceptions) | |
7271 if (other_first_child) | |
7272 { | |
7273 size_t other_children = 0; | |
7274 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
7275 other_children++; | |
7276 | |
7277 // in compact mode, each pointer assignment could result in a hash table request | |
7278 // during move, we have to relocate document first_child and parents of all children | |
7279 // normally there's just one child and its parent has a pointerless encoding but | |
7280 // we assume the worst here | |
7281 if (!other->_hash->reserve(other_children + 1)) | |
7282 { | |
7283 #ifdef PUGIXML_NO_EXCEPTIONS | |
7284 return; | |
7285 #else | |
7286 throw std::bad_alloc(); | |
7287 #endif | |
7288 } | |
7289 } | |
7290 #endif | |
7291 | |
7292 // move allocation state | |
7293 // note that other->_root may point to the embedded document page, in which case we should keep original (empty) state | |
7294 if (other->_root != PUGI_IMPL_GETPAGE(other)) | |
7295 { | |
7296 doc->_root = other->_root; | |
7297 doc->_busy_size = other->_busy_size; | |
7298 } | |
7299 | |
7300 // move buffer state | |
7301 doc->buffer = other->buffer; | |
7302 doc->extra_buffers = other->extra_buffers; | |
7303 _buffer = rhs._buffer; | |
7304 | |
7305 #ifdef PUGIXML_COMPACT | |
7306 // move compact hash; note that the hash table can have pointers to other but they will be "inactive", similarly to nodes removed with remove_child | |
7307 doc->hash = other->hash; | |
7308 doc->_hash = &doc->hash; | |
7309 | |
7310 // make sure we don't access other hash up until the end when we reinitialize other document | |
7311 other->_hash = 0; | |
7312 #endif | |
7313 | |
7314 // move page structure | |
7315 impl::xml_memory_page* doc_page = PUGI_IMPL_GETPAGE(doc); | |
7316 assert(doc_page && !doc_page->prev && !doc_page->next); | |
7317 | |
7318 impl::xml_memory_page* other_page = PUGI_IMPL_GETPAGE(other); | |
7319 assert(other_page && !other_page->prev); | |
7320 | |
7321 // relink pages since root page is embedded into xml_document | |
7322 if (impl::xml_memory_page* page = other_page->next) | |
7323 { | |
7324 assert(page->prev == other_page); | |
7325 | |
7326 page->prev = doc_page; | |
7327 | |
7328 doc_page->next = page; | |
7329 other_page->next = 0; | |
7330 } | |
7331 | |
7332 // make sure pages point to the correct document state | |
7333 for (impl::xml_memory_page* page = doc_page->next; page; page = page->next) | |
7334 { | |
7335 assert(page->allocator == other); | |
7336 | |
7337 page->allocator = doc; | |
7338 | |
7339 #ifdef PUGIXML_COMPACT | |
7340 // this automatically migrates most children between documents and prevents ->parent assignment from allocating | |
7341 if (page->compact_shared_parent == other) | |
7342 page->compact_shared_parent = doc; | |
7343 #endif | |
7344 } | |
7345 | |
7346 // move tree structure | |
7347 assert(!doc->first_child); | |
7348 | |
7349 doc->first_child = other_first_child; | |
7350 | |
7351 for (xml_node_struct* node = other_first_child; node; node = node->next_sibling) | |
7352 { | |
7353 #ifdef PUGIXML_COMPACT | |
7354 // most children will have migrated when we reassigned compact_shared_parent | |
7355 assert(node->parent == other || node->parent == doc); | |
7356 | |
7357 node->parent = doc; | |
7358 #else | |
7359 assert(node->parent == other); | |
7360 node->parent = doc; | |
7361 #endif | |
7362 } | |
7363 | |
7364 // reset other document | |
7365 new (other) impl::xml_document_struct(PUGI_IMPL_GETPAGE(other)); | |
7366 rhs._buffer = 0; | |
7367 } | |
7368 #endif | |
7369 | |
7370 #ifndef PUGIXML_NO_STL | |
7371 PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<char, std::char_traits<char> >& stream, unsigned int options, xml_encoding encoding) | |
7372 { | |
7373 reset(); | |
7374 | |
7375 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding, &_buffer); | |
7376 } | |
7377 | |
7378 PUGI_IMPL_FN xml_parse_result xml_document::load(std::basic_istream<wchar_t, std::char_traits<wchar_t> >& stream, unsigned int options) | |
7379 { | |
7380 reset(); | |
7381 | |
7382 return impl::load_stream_impl(static_cast<impl::xml_document_struct*>(_root), stream, options, encoding_wchar, &_buffer); | |
7383 } | |
7384 #endif | |
7385 | |
7386 PUGI_IMPL_FN xml_parse_result xml_document::load_string(const char_t* contents, unsigned int options) | |
7387 { | |
7388 // Force native encoding (skip autodetection) | |
7389 #ifdef PUGIXML_WCHAR_MODE | |
7390 xml_encoding encoding = encoding_wchar; | |
7391 #else | |
7392 xml_encoding encoding = encoding_utf8; | |
7393 #endif | |
7394 | |
7395 return load_buffer(contents, impl::strlength(contents) * sizeof(char_t), options, encoding); | |
7396 } | |
7397 | |
7398 PUGI_IMPL_FN xml_parse_result xml_document::load(const char_t* contents, unsigned int options) | |
7399 { | |
7400 return load_string(contents, options); | |
7401 } | |
7402 | |
7403 PUGI_IMPL_FN xml_parse_result xml_document::load_file(const char* path_, unsigned int options, xml_encoding encoding) | |
7404 { | |
7405 reset(); | |
7406 | |
7407 using impl::auto_deleter; // MSVC7 workaround | |
7408 auto_deleter<FILE> file(impl::open_file(path_, "rb"), impl::close_file); | |
7409 | |
7410 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
7411 } | |
7412 | |
7413 PUGI_IMPL_FN xml_parse_result xml_document::load_file(const wchar_t* path_, unsigned int options, xml_encoding encoding) | |
7414 { | |
7415 reset(); | |
7416 | |
7417 using impl::auto_deleter; // MSVC7 workaround | |
7418 auto_deleter<FILE> file(impl::open_file_wide(path_, L"rb"), impl::close_file); | |
7419 | |
7420 return impl::load_file_impl(static_cast<impl::xml_document_struct*>(_root), file.data, options, encoding, &_buffer); | |
7421 } | |
7422 | |
7423 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer(const void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7424 { | |
7425 reset(); | |
7426 | |
7427 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, const_cast<void*>(contents), size, options, encoding, false, false, &_buffer); | |
7428 } | |
7429 | |
7430 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7431 { | |
7432 reset(); | |
7433 | |
7434 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, false, &_buffer); | |
7435 } | |
7436 | |
7437 PUGI_IMPL_FN xml_parse_result xml_document::load_buffer_inplace_own(void* contents, size_t size, unsigned int options, xml_encoding encoding) | |
7438 { | |
7439 reset(); | |
7440 | |
7441 return impl::load_buffer_impl(static_cast<impl::xml_document_struct*>(_root), _root, contents, size, options, encoding, true, true, &_buffer); | |
7442 } | |
7443 | |
7444 PUGI_IMPL_FN void xml_document::save(xml_writer& writer, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7445 { | |
7446 impl::xml_buffered_writer buffered_writer(writer, encoding); | |
7447 | |
7448 if ((flags & format_write_bom) && encoding != encoding_latin1) | |
7449 { | |
7450 // BOM always represents the codepoint U+FEFF, so just write it in native encoding | |
7451 #ifdef PUGIXML_WCHAR_MODE | |
7452 unsigned int bom = 0xfeff; | |
7453 buffered_writer.write(static_cast<wchar_t>(bom)); | |
7454 #else | |
7455 buffered_writer.write('\xef', '\xbb', '\xbf'); | |
7456 #endif | |
7457 } | |
7458 | |
7459 if (!(flags & format_no_declaration) && !impl::has_declaration(_root)) | |
7460 { | |
7461 buffered_writer.write_string(PUGIXML_TEXT("<?xml version=\"1.0\"")); | |
7462 if (encoding == encoding_latin1) buffered_writer.write_string(PUGIXML_TEXT(" encoding=\"ISO-8859-1\"")); | |
7463 buffered_writer.write('?', '>'); | |
7464 if (!(flags & format_raw)) buffered_writer.write('\n'); | |
7465 } | |
7466 | |
7467 impl::node_output(buffered_writer, _root, indent, flags, 0); | |
7468 | |
7469 buffered_writer.flush(); | |
7470 } | |
7471 | |
7472 #ifndef PUGIXML_NO_STL | |
7473 PUGI_IMPL_FN void xml_document::save(std::basic_ostream<char, std::char_traits<char> >& stream, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7474 { | |
7475 xml_writer_stream writer(stream); | |
7476 | |
7477 save(writer, indent, flags, encoding); | |
7478 } | |
7479 | |
7480 PUGI_IMPL_FN void xml_document::save(std::basic_ostream<wchar_t, std::char_traits<wchar_t> >& stream, const char_t* indent, unsigned int flags) const | |
7481 { | |
7482 xml_writer_stream writer(stream); | |
7483 | |
7484 save(writer, indent, flags, encoding_wchar); | |
7485 } | |
7486 #endif | |
7487 | |
7488 PUGI_IMPL_FN bool xml_document::save_file(const char* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7489 { | |
7490 using impl::auto_deleter; // MSVC7 workaround | |
7491 auto_deleter<FILE> file(impl::open_file(path_, (flags & format_save_file_text) ? "w" : "wb"), impl::close_file); | |
7492 | |
7493 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
7494 } | |
7495 | |
7496 PUGI_IMPL_FN bool xml_document::save_file(const wchar_t* path_, const char_t* indent, unsigned int flags, xml_encoding encoding) const | |
7497 { | |
7498 using impl::auto_deleter; // MSVC7 workaround | |
7499 auto_deleter<FILE> file(impl::open_file_wide(path_, (flags & format_save_file_text) ? L"w" : L"wb"), impl::close_file); | |
7500 | |
7501 return impl::save_file_impl(*this, file.data, indent, flags, encoding) && fclose(file.release()) == 0; | |
7502 } | |
7503 | |
7504 PUGI_IMPL_FN xml_node xml_document::document_element() const | |
7505 { | |
7506 assert(_root); | |
7507 | |
7508 for (xml_node_struct* i = _root->first_child; i; i = i->next_sibling) | |
7509 if (PUGI_IMPL_NODETYPE(i) == node_element) | |
7510 return xml_node(i); | |
7511 | |
7512 return xml_node(); | |
7513 } | |
7514 | |
7515 #ifndef PUGIXML_NO_STL | |
7516 PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const wchar_t* str) | |
7517 { | |
7518 assert(str); | |
7519 | |
7520 return impl::as_utf8_impl(str, impl::strlength_wide(str)); | |
7521 } | |
7522 | |
7523 PUGI_IMPL_FN std::string PUGIXML_FUNCTION as_utf8(const std::basic_string<wchar_t>& str) | |
7524 { | |
7525 return impl::as_utf8_impl(str.c_str(), str.size()); | |
7526 } | |
7527 | |
7528 PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const char* str) | |
7529 { | |
7530 assert(str); | |
7531 | |
7532 return impl::as_wide_impl(str, strlen(str)); | |
7533 } | |
7534 | |
7535 PUGI_IMPL_FN std::basic_string<wchar_t> PUGIXML_FUNCTION as_wide(const std::string& str) | |
7536 { | |
7537 return impl::as_wide_impl(str.c_str(), str.size()); | |
7538 } | |
7539 #endif | |
7540 | |
7541 PUGI_IMPL_FN void PUGIXML_FUNCTION set_memory_management_functions(allocation_function allocate, deallocation_function deallocate) | |
7542 { | |
7543 impl::xml_memory::allocate = allocate; | |
7544 impl::xml_memory::deallocate = deallocate; | |
7545 } | |
7546 | |
7547 PUGI_IMPL_FN allocation_function PUGIXML_FUNCTION get_memory_allocation_function() | |
7548 { | |
7549 return impl::xml_memory::allocate; | |
7550 } | |
7551 | |
7552 PUGI_IMPL_FN deallocation_function PUGIXML_FUNCTION get_memory_deallocation_function() | |
7553 { | |
7554 return impl::xml_memory::deallocate; | |
7555 } | |
7556 } | |
7557 | |
7558 #if !defined(PUGIXML_NO_STL) && (defined(_MSC_VER) || defined(__ICC)) | |
7559 namespace std | |
7560 { | |
7561 // Workarounds for (non-standard) iterator category detection for older versions (MSVC7/IC8 and earlier) | |
7562 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_node_iterator&) | |
7563 { | |
7564 return std::bidirectional_iterator_tag(); | |
7565 } | |
7566 | |
7567 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_attribute_iterator&) | |
7568 { | |
7569 return std::bidirectional_iterator_tag(); | |
7570 } | |
7571 | |
7572 PUGI_IMPL_FN std::bidirectional_iterator_tag _Iter_cat(const pugi::xml_named_node_iterator&) | |
7573 { | |
7574 return std::bidirectional_iterator_tag(); | |
7575 } | |
7576 } | |
7577 #endif | |
7578 | |
7579 #if !defined(PUGIXML_NO_STL) && defined(__SUNPRO_CC) | |
7580 namespace std | |
7581 { | |
7582 // Workarounds for (non-standard) iterator category detection | |
7583 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_node_iterator&) | |
7584 { | |
7585 return std::bidirectional_iterator_tag(); | |
7586 } | |
7587 | |
7588 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_attribute_iterator&) | |
7589 { | |
7590 return std::bidirectional_iterator_tag(); | |
7591 } | |
7592 | |
7593 PUGI_IMPL_FN std::bidirectional_iterator_tag __iterator_category(const pugi::xml_named_node_iterator&) | |
7594 { | |
7595 return std::bidirectional_iterator_tag(); | |
7596 } | |
7597 } | |
7598 #endif | |
7599 | |
7600 #ifndef PUGIXML_NO_XPATH | |
7601 // STL replacements | |
7602 PUGI_IMPL_NS_BEGIN | |
7603 struct equal_to | |
7604 { | |
7605 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7606 { | |
7607 return lhs == rhs; | |
7608 } | |
7609 }; | |
7610 | |
7611 struct not_equal_to | |
7612 { | |
7613 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7614 { | |
7615 return lhs != rhs; | |
7616 } | |
7617 }; | |
7618 | |
7619 struct less | |
7620 { | |
7621 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7622 { | |
7623 return lhs < rhs; | |
7624 } | |
7625 }; | |
7626 | |
7627 struct less_equal | |
7628 { | |
7629 template <typename T> bool operator()(const T& lhs, const T& rhs) const | |
7630 { | |
7631 return lhs <= rhs; | |
7632 } | |
7633 }; | |
7634 | |
7635 template <typename T> inline void swap(T& lhs, T& rhs) | |
7636 { | |
7637 T temp = lhs; | |
7638 lhs = rhs; | |
7639 rhs = temp; | |
7640 } | |
7641 | |
7642 template <typename I, typename Pred> PUGI_IMPL_FN I min_element(I begin, I end, const Pred& pred) | |
7643 { | |
7644 I result = begin; | |
7645 | |
7646 for (I it = begin + 1; it != end; ++it) | |
7647 if (pred(*it, *result)) | |
7648 result = it; | |
7649 | |
7650 return result; | |
7651 } | |
7652 | |
7653 template <typename I> PUGI_IMPL_FN void reverse(I begin, I end) | |
7654 { | |
7655 while (end - begin > 1) | |
7656 swap(*begin++, *--end); | |
7657 } | |
7658 | |
7659 template <typename I> PUGI_IMPL_FN I unique(I begin, I end) | |
7660 { | |
7661 // fast skip head | |
7662 while (end - begin > 1 && *begin != *(begin + 1)) | |
7663 begin++; | |
7664 | |
7665 if (begin == end) | |
7666 return begin; | |
7667 | |
7668 // last written element | |
7669 I write = begin++; | |
7670 | |
7671 // merge unique elements | |
7672 while (begin != end) | |
7673 { | |
7674 if (*begin != *write) | |
7675 *++write = *begin++; | |
7676 else | |
7677 begin++; | |
7678 } | |
7679 | |
7680 // past-the-end (write points to live element) | |
7681 return write + 1; | |
7682 } | |
7683 | |
7684 template <typename T, typename Pred> PUGI_IMPL_FN void insertion_sort(T* begin, T* end, const Pred& pred) | |
7685 { | |
7686 if (begin == end) | |
7687 return; | |
7688 | |
7689 for (T* it = begin + 1; it != end; ++it) | |
7690 { | |
7691 T val = *it; | |
7692 T* hole = it; | |
7693 | |
7694 // move hole backwards | |
7695 while (hole > begin && pred(val, *(hole - 1))) | |
7696 { | |
7697 *hole = *(hole - 1); | |
7698 hole--; | |
7699 } | |
7700 | |
7701 // fill hole with element | |
7702 *hole = val; | |
7703 } | |
7704 } | |
7705 | |
7706 template <typename I, typename Pred> inline I median3(I first, I middle, I last, const Pred& pred) | |
7707 { | |
7708 if (pred(*middle, *first)) | |
7709 swap(middle, first); | |
7710 if (pred(*last, *middle)) | |
7711 swap(last, middle); | |
7712 if (pred(*middle, *first)) | |
7713 swap(middle, first); | |
7714 | |
7715 return middle; | |
7716 } | |
7717 | |
7718 template <typename T, typename Pred> PUGI_IMPL_FN void partition3(T* begin, T* end, T pivot, const Pred& pred, T** out_eqbeg, T** out_eqend) | |
7719 { | |
7720 // invariant: array is split into 4 groups: = < ? > (each variable denotes the boundary between the groups) | |
7721 T* eq = begin; | |
7722 T* lt = begin; | |
7723 T* gt = end; | |
7724 | |
7725 while (lt < gt) | |
7726 { | |
7727 if (pred(*lt, pivot)) | |
7728 lt++; | |
7729 else if (*lt == pivot) | |
7730 swap(*eq++, *lt++); | |
7731 else | |
7732 swap(*lt, *--gt); | |
7733 } | |
7734 | |
7735 // we now have just 4 groups: = < >; move equal elements to the middle | |
7736 T* eqbeg = gt; | |
7737 | |
7738 for (T* it = begin; it != eq; ++it) | |
7739 swap(*it, *--eqbeg); | |
7740 | |
7741 *out_eqbeg = eqbeg; | |
7742 *out_eqend = gt; | |
7743 } | |
7744 | |
7745 template <typename I, typename Pred> PUGI_IMPL_FN void sort(I begin, I end, const Pred& pred) | |
7746 { | |
7747 // sort large chunks | |
7748 while (end - begin > 16) | |
7749 { | |
7750 // find median element | |
7751 I middle = begin + (end - begin) / 2; | |
7752 I median = median3(begin, middle, end - 1, pred); | |
7753 | |
7754 // partition in three chunks (< = >) | |
7755 I eqbeg, eqend; | |
7756 partition3(begin, end, *median, pred, &eqbeg, &eqend); | |
7757 | |
7758 // loop on larger half | |
7759 if (eqbeg - begin > end - eqend) | |
7760 { | |
7761 sort(eqend, end, pred); | |
7762 end = eqbeg; | |
7763 } | |
7764 else | |
7765 { | |
7766 sort(begin, eqbeg, pred); | |
7767 begin = eqend; | |
7768 } | |
7769 } | |
7770 | |
7771 // insertion sort small chunk | |
7772 insertion_sort(begin, end, pred); | |
7773 } | |
7774 | |
7775 PUGI_IMPL_FN bool hash_insert(const void** table, size_t size, const void* key) | |
7776 { | |
7777 assert(key); | |
7778 | |
7779 unsigned int h = static_cast<unsigned int>(reinterpret_cast<uintptr_t>(key)); | |
7780 | |
7781 // MurmurHash3 32-bit finalizer | |
7782 h ^= h >> 16; | |
7783 h *= 0x85ebca6bu; | |
7784 h ^= h >> 13; | |
7785 h *= 0xc2b2ae35u; | |
7786 h ^= h >> 16; | |
7787 | |
7788 size_t hashmod = size - 1; | |
7789 size_t bucket = h & hashmod; | |
7790 | |
7791 for (size_t probe = 0; probe <= hashmod; ++probe) | |
7792 { | |
7793 if (table[bucket] == 0) | |
7794 { | |
7795 table[bucket] = key; | |
7796 return true; | |
7797 } | |
7798 | |
7799 if (table[bucket] == key) | |
7800 return false; | |
7801 | |
7802 // hash collision, quadratic probing | |
7803 bucket = (bucket + probe + 1) & hashmod; | |
7804 } | |
7805 | |
7806 assert(false && "Hash table is full"); // unreachable | |
7807 return false; | |
7808 } | |
7809 PUGI_IMPL_NS_END | |
7810 | |
7811 // Allocator used for AST and evaluation stacks | |
7812 PUGI_IMPL_NS_BEGIN | |
7813 static const size_t xpath_memory_page_size = | |
7814 #ifdef PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
7815 PUGIXML_MEMORY_XPATH_PAGE_SIZE | |
7816 #else | |
7817 4096 | |
7818 #endif | |
7819 ; | |
7820 | |
7821 static const uintptr_t xpath_memory_block_alignment = sizeof(double) > sizeof(void*) ? sizeof(double) : sizeof(void*); | |
7822 | |
7823 struct xpath_memory_block | |
7824 { | |
7825 xpath_memory_block* next; | |
7826 size_t capacity; | |
7827 | |
7828 union | |
7829 { | |
7830 char data[xpath_memory_page_size]; | |
7831 double alignment; | |
7832 }; | |
7833 }; | |
7834 | |
7835 struct xpath_allocator | |
7836 { | |
7837 xpath_memory_block* _root; | |
7838 size_t _root_size; | |
7839 bool* _error; | |
7840 | |
7841 xpath_allocator(xpath_memory_block* root, bool* error = 0): _root(root), _root_size(0), _error(error) | |
7842 { | |
7843 } | |
7844 | |
7845 void* allocate(size_t size) | |
7846 { | |
7847 // round size up to block alignment boundary | |
7848 size = (size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7849 | |
7850 if (_root_size + size <= _root->capacity) | |
7851 { | |
7852 void* buf = &_root->data[0] + _root_size; | |
7853 _root_size += size; | |
7854 return buf; | |
7855 } | |
7856 else | |
7857 { | |
7858 // make sure we have at least 1/4th of the page free after allocation to satisfy subsequent allocation requests | |
7859 size_t block_capacity_base = sizeof(_root->data); | |
7860 size_t block_capacity_req = size + block_capacity_base / 4; | |
7861 size_t block_capacity = (block_capacity_base > block_capacity_req) ? block_capacity_base : block_capacity_req; | |
7862 | |
7863 size_t block_size = block_capacity + offsetof(xpath_memory_block, data); | |
7864 | |
7865 xpath_memory_block* block = static_cast<xpath_memory_block*>(xml_memory::allocate(block_size)); | |
7866 if (!block) | |
7867 { | |
7868 if (_error) *_error = true; | |
7869 return 0; | |
7870 } | |
7871 | |
7872 block->next = _root; | |
7873 block->capacity = block_capacity; | |
7874 | |
7875 _root = block; | |
7876 _root_size = size; | |
7877 | |
7878 return block->data; | |
7879 } | |
7880 } | |
7881 | |
7882 void* reallocate(void* ptr, size_t old_size, size_t new_size) | |
7883 { | |
7884 // round size up to block alignment boundary | |
7885 old_size = (old_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7886 new_size = (new_size + xpath_memory_block_alignment - 1) & ~(xpath_memory_block_alignment - 1); | |
7887 | |
7888 // we can only reallocate the last object | |
7889 assert(ptr == 0 || static_cast<char*>(ptr) + old_size == &_root->data[0] + _root_size); | |
7890 | |
7891 // try to reallocate the object inplace | |
7892 if (ptr && _root_size - old_size + new_size <= _root->capacity) | |
7893 { | |
7894 _root_size = _root_size - old_size + new_size; | |
7895 return ptr; | |
7896 } | |
7897 | |
7898 // allocate a new block | |
7899 void* result = allocate(new_size); | |
7900 if (!result) return 0; | |
7901 | |
7902 // we have a new block | |
7903 if (ptr) | |
7904 { | |
7905 // copy old data (we only support growing) | |
7906 assert(new_size >= old_size); | |
7907 memcpy(result, ptr, old_size); | |
7908 | |
7909 // free the previous page if it had no other objects | |
7910 assert(_root->data == result); | |
7911 assert(_root->next); | |
7912 | |
7913 if (_root->next->data == ptr) | |
7914 { | |
7915 // deallocate the whole page, unless it was the first one | |
7916 xpath_memory_block* next = _root->next->next; | |
7917 | |
7918 if (next) | |
7919 { | |
7920 xml_memory::deallocate(_root->next); | |
7921 _root->next = next; | |
7922 } | |
7923 } | |
7924 } | |
7925 | |
7926 return result; | |
7927 } | |
7928 | |
7929 void revert(const xpath_allocator& state) | |
7930 { | |
7931 // free all new pages | |
7932 xpath_memory_block* cur = _root; | |
7933 | |
7934 while (cur != state._root) | |
7935 { | |
7936 xpath_memory_block* next = cur->next; | |
7937 | |
7938 xml_memory::deallocate(cur); | |
7939 | |
7940 cur = next; | |
7941 } | |
7942 | |
7943 // restore state | |
7944 _root = state._root; | |
7945 _root_size = state._root_size; | |
7946 } | |
7947 | |
7948 void release() | |
7949 { | |
7950 xpath_memory_block* cur = _root; | |
7951 assert(cur); | |
7952 | |
7953 while (cur->next) | |
7954 { | |
7955 xpath_memory_block* next = cur->next; | |
7956 | |
7957 xml_memory::deallocate(cur); | |
7958 | |
7959 cur = next; | |
7960 } | |
7961 } | |
7962 }; | |
7963 | |
7964 struct xpath_allocator_capture | |
7965 { | |
7966 xpath_allocator_capture(xpath_allocator* alloc): _target(alloc), _state(*alloc) | |
7967 { | |
7968 } | |
7969 | |
7970 ~xpath_allocator_capture() | |
7971 { | |
7972 _target->revert(_state); | |
7973 } | |
7974 | |
7975 xpath_allocator* _target; | |
7976 xpath_allocator _state; | |
7977 }; | |
7978 | |
7979 struct xpath_stack | |
7980 { | |
7981 xpath_allocator* result; | |
7982 xpath_allocator* temp; | |
7983 }; | |
7984 | |
7985 struct xpath_stack_data | |
7986 { | |
7987 xpath_memory_block blocks[2]; | |
7988 xpath_allocator result; | |
7989 xpath_allocator temp; | |
7990 xpath_stack stack; | |
7991 bool oom; | |
7992 | |
7993 xpath_stack_data(): result(blocks + 0, &oom), temp(blocks + 1, &oom), oom(false) | |
7994 { | |
7995 blocks[0].next = blocks[1].next = 0; | |
7996 blocks[0].capacity = blocks[1].capacity = sizeof(blocks[0].data); | |
7997 | |
7998 stack.result = &result; | |
7999 stack.temp = &temp; | |
8000 } | |
8001 | |
8002 ~xpath_stack_data() | |
8003 { | |
8004 result.release(); | |
8005 temp.release(); | |
8006 } | |
8007 }; | |
8008 PUGI_IMPL_NS_END | |
8009 | |
8010 // String class | |
8011 PUGI_IMPL_NS_BEGIN | |
8012 class xpath_string | |
8013 { | |
8014 const char_t* _buffer; | |
8015 bool _uses_heap; | |
8016 size_t _length_heap; | |
8017 | |
8018 static char_t* duplicate_string(const char_t* string, size_t length, xpath_allocator* alloc) | |
8019 { | |
8020 char_t* result = static_cast<char_t*>(alloc->allocate((length + 1) * sizeof(char_t))); | |
8021 if (!result) return 0; | |
8022 | |
8023 memcpy(result, string, length * sizeof(char_t)); | |
8024 result[length] = 0; | |
8025 | |
8026 return result; | |
8027 } | |
8028 | |
8029 xpath_string(const char_t* buffer, bool uses_heap_, size_t length_heap): _buffer(buffer), _uses_heap(uses_heap_), _length_heap(length_heap) | |
8030 { | |
8031 } | |
8032 | |
8033 public: | |
8034 static xpath_string from_const(const char_t* str) | |
8035 { | |
8036 return xpath_string(str, false, 0); | |
8037 } | |
8038 | |
8039 static xpath_string from_heap_preallocated(const char_t* begin, const char_t* end) | |
8040 { | |
8041 assert(begin <= end && *end == 0); | |
8042 | |
8043 return xpath_string(begin, true, static_cast<size_t>(end - begin)); | |
8044 } | |
8045 | |
8046 static xpath_string from_heap(const char_t* begin, const char_t* end, xpath_allocator* alloc) | |
8047 { | |
8048 assert(begin <= end); | |
8049 | |
8050 if (begin == end) | |
8051 return xpath_string(); | |
8052 | |
8053 size_t length = static_cast<size_t>(end - begin); | |
8054 const char_t* data = duplicate_string(begin, length, alloc); | |
8055 | |
8056 return data ? xpath_string(data, true, length) : xpath_string(); | |
8057 } | |
8058 | |
8059 xpath_string(): _buffer(PUGIXML_TEXT("")), _uses_heap(false), _length_heap(0) | |
8060 { | |
8061 } | |
8062 | |
8063 void append(const xpath_string& o, xpath_allocator* alloc) | |
8064 { | |
8065 // skip empty sources | |
8066 if (!*o._buffer) return; | |
8067 | |
8068 // fast append for constant empty target and constant source | |
8069 if (!*_buffer && !_uses_heap && !o._uses_heap) | |
8070 { | |
8071 _buffer = o._buffer; | |
8072 } | |
8073 else | |
8074 { | |
8075 // need to make heap copy | |
8076 size_t target_length = length(); | |
8077 size_t source_length = o.length(); | |
8078 size_t result_length = target_length + source_length; | |
8079 | |
8080 // allocate new buffer | |
8081 char_t* result = static_cast<char_t*>(alloc->reallocate(_uses_heap ? const_cast<char_t*>(_buffer) : 0, (target_length + 1) * sizeof(char_t), (result_length + 1) * sizeof(char_t))); | |
8082 if (!result) return; | |
8083 | |
8084 // append first string to the new buffer in case there was no reallocation | |
8085 if (!_uses_heap) memcpy(result, _buffer, target_length * sizeof(char_t)); | |
8086 | |
8087 // append second string to the new buffer | |
8088 memcpy(result + target_length, o._buffer, source_length * sizeof(char_t)); | |
8089 result[result_length] = 0; | |
8090 | |
8091 // finalize | |
8092 _buffer = result; | |
8093 _uses_heap = true; | |
8094 _length_heap = result_length; | |
8095 } | |
8096 } | |
8097 | |
8098 const char_t* c_str() const | |
8099 { | |
8100 return _buffer; | |
8101 } | |
8102 | |
8103 size_t length() const | |
8104 { | |
8105 return _uses_heap ? _length_heap : strlength(_buffer); | |
8106 } | |
8107 | |
8108 char_t* data(xpath_allocator* alloc) | |
8109 { | |
8110 // make private heap copy | |
8111 if (!_uses_heap) | |
8112 { | |
8113 size_t length_ = strlength(_buffer); | |
8114 const char_t* data_ = duplicate_string(_buffer, length_, alloc); | |
8115 | |
8116 if (!data_) return 0; | |
8117 | |
8118 _buffer = data_; | |
8119 _uses_heap = true; | |
8120 _length_heap = length_; | |
8121 } | |
8122 | |
8123 return const_cast<char_t*>(_buffer); | |
8124 } | |
8125 | |
8126 bool empty() const | |
8127 { | |
8128 return *_buffer == 0; | |
8129 } | |
8130 | |
8131 bool operator==(const xpath_string& o) const | |
8132 { | |
8133 return strequal(_buffer, o._buffer); | |
8134 } | |
8135 | |
8136 bool operator!=(const xpath_string& o) const | |
8137 { | |
8138 return !strequal(_buffer, o._buffer); | |
8139 } | |
8140 | |
8141 bool uses_heap() const | |
8142 { | |
8143 return _uses_heap; | |
8144 } | |
8145 }; | |
8146 PUGI_IMPL_NS_END | |
8147 | |
8148 PUGI_IMPL_NS_BEGIN | |
8149 PUGI_IMPL_FN bool starts_with(const char_t* string, const char_t* pattern) | |
8150 { | |
8151 while (*pattern && *string == *pattern) | |
8152 { | |
8153 string++; | |
8154 pattern++; | |
8155 } | |
8156 | |
8157 return *pattern == 0; | |
8158 } | |
8159 | |
8160 PUGI_IMPL_FN const char_t* find_char(const char_t* s, char_t c) | |
8161 { | |
8162 #ifdef PUGIXML_WCHAR_MODE | |
8163 return wcschr(s, c); | |
8164 #else | |
8165 return strchr(s, c); | |
8166 #endif | |
8167 } | |
8168 | |
8169 PUGI_IMPL_FN const char_t* find_substring(const char_t* s, const char_t* p) | |
8170 { | |
8171 #ifdef PUGIXML_WCHAR_MODE | |
8172 // MSVC6 wcsstr bug workaround (if s is empty it always returns 0) | |
8173 return (*p == 0) ? s : wcsstr(s, p); | |
8174 #else | |
8175 return strstr(s, p); | |
8176 #endif | |
8177 } | |
8178 | |
8179 // Converts symbol to lower case, if it is an ASCII one | |
8180 PUGI_IMPL_FN char_t tolower_ascii(char_t ch) | |
8181 { | |
8182 return static_cast<unsigned int>(ch - 'A') < 26 ? static_cast<char_t>(ch | ' ') : ch; | |
8183 } | |
8184 | |
8185 PUGI_IMPL_FN xpath_string string_value(const xpath_node& na, xpath_allocator* alloc) | |
8186 { | |
8187 if (na.attribute()) | |
8188 return xpath_string::from_const(na.attribute().value()); | |
8189 else | |
8190 { | |
8191 xml_node n = na.node(); | |
8192 | |
8193 switch (n.type()) | |
8194 { | |
8195 case node_pcdata: | |
8196 case node_cdata: | |
8197 case node_comment: | |
8198 case node_pi: | |
8199 return xpath_string::from_const(n.value()); | |
8200 | |
8201 case node_document: | |
8202 case node_element: | |
8203 { | |
8204 xpath_string result; | |
8205 | |
8206 // element nodes can have value if parse_embed_pcdata was used | |
8207 if (n.value()[0]) | |
8208 result.append(xpath_string::from_const(n.value()), alloc); | |
8209 | |
8210 xml_node cur = n.first_child(); | |
8211 | |
8212 while (cur && cur != n) | |
8213 { | |
8214 if (cur.type() == node_pcdata || cur.type() == node_cdata) | |
8215 result.append(xpath_string::from_const(cur.value()), alloc); | |
8216 | |
8217 if (cur.first_child()) | |
8218 cur = cur.first_child(); | |
8219 else if (cur.next_sibling()) | |
8220 cur = cur.next_sibling(); | |
8221 else | |
8222 { | |
8223 while (!cur.next_sibling() && cur != n) | |
8224 cur = cur.parent(); | |
8225 | |
8226 if (cur != n) cur = cur.next_sibling(); | |
8227 } | |
8228 } | |
8229 | |
8230 return result; | |
8231 } | |
8232 | |
8233 default: | |
8234 return xpath_string(); | |
8235 } | |
8236 } | |
8237 } | |
8238 | |
8239 PUGI_IMPL_FN bool node_is_before_sibling(xml_node_struct* ln, xml_node_struct* rn) | |
8240 { | |
8241 assert(ln->parent == rn->parent); | |
8242 | |
8243 // there is no common ancestor (the shared parent is null), nodes are from different documents | |
8244 if (!ln->parent) return ln < rn; | |
8245 | |
8246 // determine sibling order | |
8247 xml_node_struct* ls = ln; | |
8248 xml_node_struct* rs = rn; | |
8249 | |
8250 while (ls && rs) | |
8251 { | |
8252 if (ls == rn) return true; | |
8253 if (rs == ln) return false; | |
8254 | |
8255 ls = ls->next_sibling; | |
8256 rs = rs->next_sibling; | |
8257 } | |
8258 | |
8259 // if rn sibling chain ended ln must be before rn | |
8260 return !rs; | |
8261 } | |
8262 | |
8263 PUGI_IMPL_FN bool node_is_before(xml_node_struct* ln, xml_node_struct* rn) | |
8264 { | |
8265 // find common ancestor at the same depth, if any | |
8266 xml_node_struct* lp = ln; | |
8267 xml_node_struct* rp = rn; | |
8268 | |
8269 while (lp && rp && lp->parent != rp->parent) | |
8270 { | |
8271 lp = lp->parent; | |
8272 rp = rp->parent; | |
8273 } | |
8274 | |
8275 // parents are the same! | |
8276 if (lp && rp) return node_is_before_sibling(lp, rp); | |
8277 | |
8278 // nodes are at different depths, need to normalize heights | |
8279 bool left_higher = !lp; | |
8280 | |
8281 while (lp) | |
8282 { | |
8283 lp = lp->parent; | |
8284 ln = ln->parent; | |
8285 } | |
8286 | |
8287 while (rp) | |
8288 { | |
8289 rp = rp->parent; | |
8290 rn = rn->parent; | |
8291 } | |
8292 | |
8293 // one node is the ancestor of the other | |
8294 if (ln == rn) return left_higher; | |
8295 | |
8296 // find common ancestor... again | |
8297 while (ln->parent != rn->parent) | |
8298 { | |
8299 ln = ln->parent; | |
8300 rn = rn->parent; | |
8301 } | |
8302 | |
8303 return node_is_before_sibling(ln, rn); | |
8304 } | |
8305 | |
8306 PUGI_IMPL_FN bool node_is_ancestor(xml_node_struct* parent, xml_node_struct* node) | |
8307 { | |
8308 while (node && node != parent) node = node->parent; | |
8309 | |
8310 return parent && node == parent; | |
8311 } | |
8312 | |
8313 PUGI_IMPL_FN const void* document_buffer_order(const xpath_node& xnode) | |
8314 { | |
8315 xml_node_struct* node = xnode.node().internal_object(); | |
8316 | |
8317 if (node) | |
8318 { | |
8319 if ((get_document(node).header & xml_memory_page_contents_shared_mask) == 0) | |
8320 { | |
8321 if (node->name && (node->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return node->name; | |
8322 if (node->value && (node->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return node->value; | |
8323 } | |
8324 | |
8325 return 0; | |
8326 } | |
8327 | |
8328 xml_attribute_struct* attr = xnode.attribute().internal_object(); | |
8329 | |
8330 if (attr) | |
8331 { | |
8332 if ((get_document(attr).header & xml_memory_page_contents_shared_mask) == 0) | |
8333 { | |
8334 if ((attr->header & impl::xml_memory_page_name_allocated_or_shared_mask) == 0) return attr->name; | |
8335 if ((attr->header & impl::xml_memory_page_value_allocated_or_shared_mask) == 0) return attr->value; | |
8336 } | |
8337 | |
8338 return 0; | |
8339 } | |
8340 | |
8341 return 0; | |
8342 } | |
8343 | |
8344 struct document_order_comparator | |
8345 { | |
8346 bool operator()(const xpath_node& lhs, const xpath_node& rhs) const | |
8347 { | |
8348 // optimized document order based check | |
8349 const void* lo = document_buffer_order(lhs); | |
8350 const void* ro = document_buffer_order(rhs); | |
8351 | |
8352 if (lo && ro) return lo < ro; | |
8353 | |
8354 // slow comparison | |
8355 xml_node ln = lhs.node(), rn = rhs.node(); | |
8356 | |
8357 // compare attributes | |
8358 if (lhs.attribute() && rhs.attribute()) | |
8359 { | |
8360 // shared parent | |
8361 if (lhs.parent() == rhs.parent()) | |
8362 { | |
8363 // determine sibling order | |
8364 for (xml_attribute a = lhs.attribute(); a; a = a.next_attribute()) | |
8365 if (a == rhs.attribute()) | |
8366 return true; | |
8367 | |
8368 return false; | |
8369 } | |
8370 | |
8371 // compare attribute parents | |
8372 ln = lhs.parent(); | |
8373 rn = rhs.parent(); | |
8374 } | |
8375 else if (lhs.attribute()) | |
8376 { | |
8377 // attributes go after the parent element | |
8378 if (lhs.parent() == rhs.node()) return false; | |
8379 | |
8380 ln = lhs.parent(); | |
8381 } | |
8382 else if (rhs.attribute()) | |
8383 { | |
8384 // attributes go after the parent element | |
8385 if (rhs.parent() == lhs.node()) return true; | |
8386 | |
8387 rn = rhs.parent(); | |
8388 } | |
8389 | |
8390 if (ln == rn) return false; | |
8391 | |
8392 if (!ln || !rn) return ln < rn; | |
8393 | |
8394 return node_is_before(ln.internal_object(), rn.internal_object()); | |
8395 } | |
8396 }; | |
8397 | |
8398 PUGI_IMPL_FN double gen_nan() | |
8399 { | |
8400 #if defined(__STDC_IEC_559__) || ((FLT_RADIX - 0 == 2) && (FLT_MAX_EXP - 0 == 128) && (FLT_MANT_DIG - 0 == 24)) | |
8401 PUGI_IMPL_STATIC_ASSERT(sizeof(float) == sizeof(uint32_t)); | |
8402 typedef uint32_t UI; // BCC5 workaround | |
8403 union { float f; UI i; } u; | |
8404 u.i = 0x7fc00000; | |
8405 return double(u.f); | |
8406 #else | |
8407 // fallback | |
8408 const volatile double zero = 0.0; | |
8409 return zero / zero; | |
8410 #endif | |
8411 } | |
8412 | |
8413 PUGI_IMPL_FN bool is_nan(double value) | |
8414 { | |
8415 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
8416 return !!_isnan(value); | |
8417 #elif defined(fpclassify) && defined(FP_NAN) | |
8418 return fpclassify(value) == FP_NAN; | |
8419 #else | |
8420 // fallback | |
8421 const volatile double v = value; | |
8422 return v != v; | |
8423 #endif | |
8424 } | |
8425 | |
8426 PUGI_IMPL_FN const char_t* convert_number_to_string_special(double value) | |
8427 { | |
8428 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) || defined(__BORLANDC__) | |
8429 if (_finite(value)) return (value == 0) ? PUGIXML_TEXT("0") : 0; | |
8430 if (_isnan(value)) return PUGIXML_TEXT("NaN"); | |
8431 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8432 #elif defined(fpclassify) && defined(FP_NAN) && defined(FP_INFINITE) && defined(FP_ZERO) | |
8433 switch (fpclassify(value)) | |
8434 { | |
8435 case FP_NAN: | |
8436 return PUGIXML_TEXT("NaN"); | |
8437 | |
8438 case FP_INFINITE: | |
8439 return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8440 | |
8441 case FP_ZERO: | |
8442 return PUGIXML_TEXT("0"); | |
8443 | |
8444 default: | |
8445 return 0; | |
8446 } | |
8447 #else | |
8448 // fallback | |
8449 const volatile double v = value; | |
8450 | |
8451 if (v == 0) return PUGIXML_TEXT("0"); | |
8452 if (v != v) return PUGIXML_TEXT("NaN"); | |
8453 if (v * 2 == v) return value > 0 ? PUGIXML_TEXT("Infinity") : PUGIXML_TEXT("-Infinity"); | |
8454 return 0; | |
8455 #endif | |
8456 } | |
8457 | |
8458 PUGI_IMPL_FN bool convert_number_to_boolean(double value) | |
8459 { | |
8460 return (value != 0 && !is_nan(value)); | |
8461 } | |
8462 | |
8463 PUGI_IMPL_FN void truncate_zeros(char* begin, char* end) | |
8464 { | |
8465 while (begin != end && end[-1] == '0') end--; | |
8466 | |
8467 *end = 0; | |
8468 } | |
8469 | |
8470 // gets mantissa digits in the form of 0.xxxxx with 0. implied and the exponent | |
8471 #if defined(PUGI_IMPL_MSVC_CRT_VERSION) && PUGI_IMPL_MSVC_CRT_VERSION >= 1400 | |
8472 PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
8473 { | |
8474 // get base values | |
8475 int sign, exponent; | |
8476 _ecvt_s(buffer, sizeof(buffer), value, DBL_DIG + 1, &exponent, &sign); | |
8477 | |
8478 // truncate redundant zeros | |
8479 truncate_zeros(buffer, buffer + strlen(buffer)); | |
8480 | |
8481 // fill results | |
8482 *out_mantissa = buffer; | |
8483 *out_exponent = exponent; | |
8484 } | |
8485 #else | |
8486 PUGI_IMPL_FN void convert_number_to_mantissa_exponent(double value, char (&buffer)[32], char** out_mantissa, int* out_exponent) | |
8487 { | |
8488 // get a scientific notation value with IEEE DBL_DIG decimals | |
8489 PUGI_IMPL_SNPRINTF(buffer, "%.*e", DBL_DIG, value); | |
8490 | |
8491 // get the exponent (possibly negative) | |
8492 char* exponent_string = strchr(buffer, 'e'); | |
8493 assert(exponent_string); | |
8494 | |
8495 int exponent = atoi(exponent_string + 1); | |
8496 | |
8497 // extract mantissa string: skip sign | |
8498 char* mantissa = buffer[0] == '-' ? buffer + 1 : buffer; | |
8499 assert(mantissa[0] != '0' && (mantissa[1] == '.' || mantissa[1] == ',')); | |
8500 | |
8501 // divide mantissa by 10 to eliminate integer part | |
8502 mantissa[1] = mantissa[0]; | |
8503 mantissa++; | |
8504 exponent++; | |
8505 | |
8506 // remove extra mantissa digits and zero-terminate mantissa | |
8507 truncate_zeros(mantissa, exponent_string); | |
8508 | |
8509 // fill results | |
8510 *out_mantissa = mantissa; | |
8511 *out_exponent = exponent; | |
8512 } | |
8513 #endif | |
8514 | |
8515 PUGI_IMPL_FN xpath_string convert_number_to_string(double value, xpath_allocator* alloc) | |
8516 { | |
8517 // try special number conversion | |
8518 const char_t* special = convert_number_to_string_special(value); | |
8519 if (special) return xpath_string::from_const(special); | |
8520 | |
8521 // get mantissa + exponent form | |
8522 char mantissa_buffer[32]; | |
8523 | |
8524 char* mantissa; | |
8525 int exponent; | |
8526 convert_number_to_mantissa_exponent(value, mantissa_buffer, &mantissa, &exponent); | |
8527 | |
8528 // allocate a buffer of suitable length for the number | |
8529 size_t result_size = strlen(mantissa_buffer) + (exponent > 0 ? exponent : -exponent) + 4; | |
8530 char_t* result = static_cast<char_t*>(alloc->allocate(sizeof(char_t) * result_size)); | |
8531 if (!result) return xpath_string(); | |
8532 | |
8533 // make the number! | |
8534 char_t* s = result; | |
8535 | |
8536 // sign | |
8537 if (value < 0) *s++ = '-'; | |
8538 | |
8539 // integer part | |
8540 if (exponent <= 0) | |
8541 { | |
8542 *s++ = '0'; | |
8543 } | |
8544 else | |
8545 { | |
8546 while (exponent > 0) | |
8547 { | |
8548 assert(*mantissa == 0 || static_cast<unsigned int>(*mantissa - '0') <= 9); | |
8549 *s++ = *mantissa ? *mantissa++ : '0'; | |
8550 exponent--; | |
8551 } | |
8552 } | |
8553 | |
8554 // fractional part | |
8555 if (*mantissa) | |
8556 { | |
8557 // decimal point | |
8558 *s++ = '.'; | |
8559 | |
8560 // extra zeroes from negative exponent | |
8561 while (exponent < 0) | |
8562 { | |
8563 *s++ = '0'; | |
8564 exponent++; | |
8565 } | |
8566 | |
8567 // extra mantissa digits | |
8568 while (*mantissa) | |
8569 { | |
8570 assert(static_cast<unsigned int>(*mantissa - '0') <= 9); | |
8571 *s++ = *mantissa++; | |
8572 } | |
8573 } | |
8574 | |
8575 // zero-terminate | |
8576 assert(s < result + result_size); | |
8577 *s = 0; | |
8578 | |
8579 return xpath_string::from_heap_preallocated(result, s); | |
8580 } | |
8581 | |
8582 PUGI_IMPL_FN bool check_string_to_number_format(const char_t* string) | |
8583 { | |
8584 // parse leading whitespace | |
8585 while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string; | |
8586 | |
8587 // parse sign | |
8588 if (*string == '-') ++string; | |
8589 | |
8590 if (!*string) return false; | |
8591 | |
8592 // if there is no integer part, there should be a decimal part with at least one digit | |
8593 if (!PUGI_IMPL_IS_CHARTYPEX(string[0], ctx_digit) && (string[0] != '.' || !PUGI_IMPL_IS_CHARTYPEX(string[1], ctx_digit))) return false; | |
8594 | |
8595 // parse integer part | |
8596 while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
8597 | |
8598 // parse decimal part | |
8599 if (*string == '.') | |
8600 { | |
8601 ++string; | |
8602 | |
8603 while (PUGI_IMPL_IS_CHARTYPEX(*string, ctx_digit)) ++string; | |
8604 } | |
8605 | |
8606 // parse trailing whitespace | |
8607 while (PUGI_IMPL_IS_CHARTYPE(*string, ct_space)) ++string; | |
8608 | |
8609 return *string == 0; | |
8610 } | |
8611 | |
8612 PUGI_IMPL_FN double convert_string_to_number(const char_t* string) | |
8613 { | |
8614 // check string format | |
8615 if (!check_string_to_number_format(string)) return gen_nan(); | |
8616 | |
8617 // parse string | |
8618 #ifdef PUGIXML_WCHAR_MODE | |
8619 return wcstod(string, 0); | |
8620 #else | |
8621 return strtod(string, 0); | |
8622 #endif | |
8623 } | |
8624 | |
8625 PUGI_IMPL_FN bool convert_string_to_number_scratch(char_t (&buffer)[32], const char_t* begin, const char_t* end, double* out_result) | |
8626 { | |
8627 size_t length = static_cast<size_t>(end - begin); | |
8628 char_t* scratch = buffer; | |
8629 | |
8630 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
8631 { | |
8632 // need to make dummy on-heap copy | |
8633 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
8634 if (!scratch) return false; | |
8635 } | |
8636 | |
8637 // copy string to zero-terminated buffer and perform conversion | |
8638 memcpy(scratch, begin, length * sizeof(char_t)); | |
8639 scratch[length] = 0; | |
8640 | |
8641 *out_result = convert_string_to_number(scratch); | |
8642 | |
8643 // free dummy buffer | |
8644 if (scratch != buffer) xml_memory::deallocate(scratch); | |
8645 | |
8646 return true; | |
8647 } | |
8648 | |
8649 PUGI_IMPL_FN double round_nearest(double value) | |
8650 { | |
8651 return floor(value + 0.5); | |
8652 } | |
8653 | |
8654 PUGI_IMPL_FN double round_nearest_nzero(double value) | |
8655 { | |
8656 // same as round_nearest, but returns -0 for [-0.5, -0] | |
8657 // ceil is used to differentiate between +0 and -0 (we return -0 for [-0.5, -0] and +0 for +0) | |
8658 return (value >= -0.5 && value <= 0) ? ceil(value) : floor(value + 0.5); | |
8659 } | |
8660 | |
8661 PUGI_IMPL_FN const char_t* qualified_name(const xpath_node& node) | |
8662 { | |
8663 return node.attribute() ? node.attribute().name() : node.node().name(); | |
8664 } | |
8665 | |
8666 PUGI_IMPL_FN const char_t* local_name(const xpath_node& node) | |
8667 { | |
8668 const char_t* name = qualified_name(node); | |
8669 const char_t* p = find_char(name, ':'); | |
8670 | |
8671 return p ? p + 1 : name; | |
8672 } | |
8673 | |
8674 struct namespace_uri_predicate | |
8675 { | |
8676 const char_t* prefix; | |
8677 size_t prefix_length; | |
8678 | |
8679 namespace_uri_predicate(const char_t* name) | |
8680 { | |
8681 const char_t* pos = find_char(name, ':'); | |
8682 | |
8683 prefix = pos ? name : 0; | |
8684 prefix_length = pos ? static_cast<size_t>(pos - name) : 0; | |
8685 } | |
8686 | |
8687 bool operator()(xml_attribute a) const | |
8688 { | |
8689 const char_t* name = a.name(); | |
8690 | |
8691 if (!starts_with(name, PUGIXML_TEXT("xmlns"))) return false; | |
8692 | |
8693 return prefix ? name[5] == ':' && strequalrange(name + 6, prefix, prefix_length) : name[5] == 0; | |
8694 } | |
8695 }; | |
8696 | |
8697 PUGI_IMPL_FN const char_t* namespace_uri(xml_node node) | |
8698 { | |
8699 namespace_uri_predicate pred = node.name(); | |
8700 | |
8701 xml_node p = node; | |
8702 | |
8703 while (p) | |
8704 { | |
8705 xml_attribute a = p.find_attribute(pred); | |
8706 | |
8707 if (a) return a.value(); | |
8708 | |
8709 p = p.parent(); | |
8710 } | |
8711 | |
8712 return PUGIXML_TEXT(""); | |
8713 } | |
8714 | |
8715 PUGI_IMPL_FN const char_t* namespace_uri(xml_attribute attr, xml_node parent) | |
8716 { | |
8717 namespace_uri_predicate pred = attr.name(); | |
8718 | |
8719 // Default namespace does not apply to attributes | |
8720 if (!pred.prefix) return PUGIXML_TEXT(""); | |
8721 | |
8722 xml_node p = parent; | |
8723 | |
8724 while (p) | |
8725 { | |
8726 xml_attribute a = p.find_attribute(pred); | |
8727 | |
8728 if (a) return a.value(); | |
8729 | |
8730 p = p.parent(); | |
8731 } | |
8732 | |
8733 return PUGIXML_TEXT(""); | |
8734 } | |
8735 | |
8736 PUGI_IMPL_FN const char_t* namespace_uri(const xpath_node& node) | |
8737 { | |
8738 return node.attribute() ? namespace_uri(node.attribute(), node.parent()) : namespace_uri(node.node()); | |
8739 } | |
8740 | |
8741 PUGI_IMPL_FN char_t* normalize_space(char_t* buffer) | |
8742 { | |
8743 char_t* write = buffer; | |
8744 | |
8745 for (char_t* it = buffer; *it; ) | |
8746 { | |
8747 char_t ch = *it++; | |
8748 | |
8749 if (PUGI_IMPL_IS_CHARTYPE(ch, ct_space)) | |
8750 { | |
8751 // replace whitespace sequence with single space | |
8752 while (PUGI_IMPL_IS_CHARTYPE(*it, ct_space)) it++; | |
8753 | |
8754 // avoid leading spaces | |
8755 if (write != buffer) *write++ = ' '; | |
8756 } | |
8757 else *write++ = ch; | |
8758 } | |
8759 | |
8760 // remove trailing space | |
8761 if (write != buffer && PUGI_IMPL_IS_CHARTYPE(write[-1], ct_space)) write--; | |
8762 | |
8763 // zero-terminate | |
8764 *write = 0; | |
8765 | |
8766 return write; | |
8767 } | |
8768 | |
8769 PUGI_IMPL_FN char_t* translate(char_t* buffer, const char_t* from, const char_t* to, size_t to_length) | |
8770 { | |
8771 char_t* write = buffer; | |
8772 | |
8773 while (*buffer) | |
8774 { | |
8775 PUGI_IMPL_DMC_VOLATILE char_t ch = *buffer++; | |
8776 | |
8777 const char_t* pos = find_char(from, ch); | |
8778 | |
8779 if (!pos) | |
8780 *write++ = ch; // do not process | |
8781 else if (static_cast<size_t>(pos - from) < to_length) | |
8782 *write++ = to[pos - from]; // replace | |
8783 } | |
8784 | |
8785 // zero-terminate | |
8786 *write = 0; | |
8787 | |
8788 return write; | |
8789 } | |
8790 | |
8791 PUGI_IMPL_FN unsigned char* translate_table_generate(xpath_allocator* alloc, const char_t* from, const char_t* to) | |
8792 { | |
8793 unsigned char table[128] = {0}; | |
8794 | |
8795 while (*from) | |
8796 { | |
8797 unsigned int fc = static_cast<unsigned int>(*from); | |
8798 unsigned int tc = static_cast<unsigned int>(*to); | |
8799 | |
8800 if (fc >= 128 || tc >= 128) | |
8801 return 0; | |
8802 | |
8803 // code=128 means "skip character" | |
8804 if (!table[fc]) | |
8805 table[fc] = static_cast<unsigned char>(tc ? tc : 128); | |
8806 | |
8807 from++; | |
8808 if (tc) to++; | |
8809 } | |
8810 | |
8811 for (int i = 0; i < 128; ++i) | |
8812 if (!table[i]) | |
8813 table[i] = static_cast<unsigned char>(i); | |
8814 | |
8815 void* result = alloc->allocate(sizeof(table)); | |
8816 if (!result) return 0; | |
8817 | |
8818 memcpy(result, table, sizeof(table)); | |
8819 | |
8820 return static_cast<unsigned char*>(result); | |
8821 } | |
8822 | |
8823 PUGI_IMPL_FN char_t* translate_table(char_t* buffer, const unsigned char* table) | |
8824 { | |
8825 char_t* write = buffer; | |
8826 | |
8827 while (*buffer) | |
8828 { | |
8829 char_t ch = *buffer++; | |
8830 unsigned int index = static_cast<unsigned int>(ch); | |
8831 | |
8832 if (index < 128) | |
8833 { | |
8834 unsigned char code = table[index]; | |
8835 | |
8836 // code=128 means "skip character" (table size is 128 so 128 can be a special value) | |
8837 // this code skips these characters without extra branches | |
8838 *write = static_cast<char_t>(code); | |
8839 write += 1 - (code >> 7); | |
8840 } | |
8841 else | |
8842 { | |
8843 *write++ = ch; | |
8844 } | |
8845 } | |
8846 | |
8847 // zero-terminate | |
8848 *write = 0; | |
8849 | |
8850 return write; | |
8851 } | |
8852 | |
8853 inline bool is_xpath_attribute(const char_t* name) | |
8854 { | |
8855 return !(starts_with(name, PUGIXML_TEXT("xmlns")) && (name[5] == 0 || name[5] == ':')); | |
8856 } | |
8857 | |
8858 struct xpath_variable_boolean: xpath_variable | |
8859 { | |
8860 xpath_variable_boolean(): xpath_variable(xpath_type_boolean), value(false) | |
8861 { | |
8862 } | |
8863 | |
8864 bool value; | |
8865 char_t name[1]; | |
8866 }; | |
8867 | |
8868 struct xpath_variable_number: xpath_variable | |
8869 { | |
8870 xpath_variable_number(): xpath_variable(xpath_type_number), value(0) | |
8871 { | |
8872 } | |
8873 | |
8874 double value; | |
8875 char_t name[1]; | |
8876 }; | |
8877 | |
8878 struct xpath_variable_string: xpath_variable | |
8879 { | |
8880 xpath_variable_string(): xpath_variable(xpath_type_string), value(0) | |
8881 { | |
8882 } | |
8883 | |
8884 ~xpath_variable_string() | |
8885 { | |
8886 if (value) xml_memory::deallocate(value); | |
8887 } | |
8888 | |
8889 char_t* value; | |
8890 char_t name[1]; | |
8891 }; | |
8892 | |
8893 struct xpath_variable_node_set: xpath_variable | |
8894 { | |
8895 xpath_variable_node_set(): xpath_variable(xpath_type_node_set) | |
8896 { | |
8897 } | |
8898 | |
8899 xpath_node_set value; | |
8900 char_t name[1]; | |
8901 }; | |
8902 | |
8903 static const xpath_node_set dummy_node_set; | |
8904 | |
8905 PUGI_IMPL_FN PUGI_IMPL_UNSIGNED_OVERFLOW unsigned int hash_string(const char_t* str) | |
8906 { | |
8907 // Jenkins one-at-a-time hash (http://en.wikipedia.org/wiki/Jenkins_hash_function#one-at-a-time) | |
8908 unsigned int result = 0; | |
8909 | |
8910 while (*str) | |
8911 { | |
8912 result += static_cast<unsigned int>(*str++); | |
8913 result += result << 10; | |
8914 result ^= result >> 6; | |
8915 } | |
8916 | |
8917 result += result << 3; | |
8918 result ^= result >> 11; | |
8919 result += result << 15; | |
8920 | |
8921 return result; | |
8922 } | |
8923 | |
8924 template <typename T> PUGI_IMPL_FN T* new_xpath_variable(const char_t* name) | |
8925 { | |
8926 size_t length = strlength(name); | |
8927 if (length == 0) return 0; // empty variable names are invalid | |
8928 | |
8929 // $$ we can't use offsetof(T, name) because T is non-POD, so we just allocate additional length characters | |
8930 void* memory = xml_memory::allocate(sizeof(T) + length * sizeof(char_t)); | |
8931 if (!memory) return 0; | |
8932 | |
8933 T* result = new (memory) T(); | |
8934 | |
8935 memcpy(result->name, name, (length + 1) * sizeof(char_t)); | |
8936 | |
8937 return result; | |
8938 } | |
8939 | |
8940 PUGI_IMPL_FN xpath_variable* new_xpath_variable(xpath_value_type type, const char_t* name) | |
8941 { | |
8942 switch (type) | |
8943 { | |
8944 case xpath_type_node_set: | |
8945 return new_xpath_variable<xpath_variable_node_set>(name); | |
8946 | |
8947 case xpath_type_number: | |
8948 return new_xpath_variable<xpath_variable_number>(name); | |
8949 | |
8950 case xpath_type_string: | |
8951 return new_xpath_variable<xpath_variable_string>(name); | |
8952 | |
8953 case xpath_type_boolean: | |
8954 return new_xpath_variable<xpath_variable_boolean>(name); | |
8955 | |
8956 default: | |
8957 return 0; | |
8958 } | |
8959 } | |
8960 | |
8961 template <typename T> PUGI_IMPL_FN void delete_xpath_variable(T* var) | |
8962 { | |
8963 var->~T(); | |
8964 xml_memory::deallocate(var); | |
8965 } | |
8966 | |
8967 PUGI_IMPL_FN void delete_xpath_variable(xpath_value_type type, xpath_variable* var) | |
8968 { | |
8969 switch (type) | |
8970 { | |
8971 case xpath_type_node_set: | |
8972 delete_xpath_variable(static_cast<xpath_variable_node_set*>(var)); | |
8973 break; | |
8974 | |
8975 case xpath_type_number: | |
8976 delete_xpath_variable(static_cast<xpath_variable_number*>(var)); | |
8977 break; | |
8978 | |
8979 case xpath_type_string: | |
8980 delete_xpath_variable(static_cast<xpath_variable_string*>(var)); | |
8981 break; | |
8982 | |
8983 case xpath_type_boolean: | |
8984 delete_xpath_variable(static_cast<xpath_variable_boolean*>(var)); | |
8985 break; | |
8986 | |
8987 default: | |
8988 assert(false && "Invalid variable type"); // unreachable | |
8989 } | |
8990 } | |
8991 | |
8992 PUGI_IMPL_FN bool copy_xpath_variable(xpath_variable* lhs, const xpath_variable* rhs) | |
8993 { | |
8994 switch (rhs->type()) | |
8995 { | |
8996 case xpath_type_node_set: | |
8997 return lhs->set(static_cast<const xpath_variable_node_set*>(rhs)->value); | |
8998 | |
8999 case xpath_type_number: | |
9000 return lhs->set(static_cast<const xpath_variable_number*>(rhs)->value); | |
9001 | |
9002 case xpath_type_string: | |
9003 return lhs->set(static_cast<const xpath_variable_string*>(rhs)->value); | |
9004 | |
9005 case xpath_type_boolean: | |
9006 return lhs->set(static_cast<const xpath_variable_boolean*>(rhs)->value); | |
9007 | |
9008 default: | |
9009 assert(false && "Invalid variable type"); // unreachable | |
9010 return false; | |
9011 } | |
9012 } | |
9013 | |
9014 PUGI_IMPL_FN bool get_variable_scratch(char_t (&buffer)[32], xpath_variable_set* set, const char_t* begin, const char_t* end, xpath_variable** out_result) | |
9015 { | |
9016 size_t length = static_cast<size_t>(end - begin); | |
9017 char_t* scratch = buffer; | |
9018 | |
9019 if (length >= sizeof(buffer) / sizeof(buffer[0])) | |
9020 { | |
9021 // need to make dummy on-heap copy | |
9022 scratch = static_cast<char_t*>(xml_memory::allocate((length + 1) * sizeof(char_t))); | |
9023 if (!scratch) return false; | |
9024 } | |
9025 | |
9026 // copy string to zero-terminated buffer and perform lookup | |
9027 memcpy(scratch, begin, length * sizeof(char_t)); | |
9028 scratch[length] = 0; | |
9029 | |
9030 *out_result = set->get(scratch); | |
9031 | |
9032 // free dummy buffer | |
9033 if (scratch != buffer) xml_memory::deallocate(scratch); | |
9034 | |
9035 return true; | |
9036 } | |
9037 PUGI_IMPL_NS_END | |
9038 | |
9039 // Internal node set class | |
9040 PUGI_IMPL_NS_BEGIN | |
9041 PUGI_IMPL_FN xpath_node_set::type_t xpath_get_order(const xpath_node* begin, const xpath_node* end) | |
9042 { | |
9043 if (end - begin < 2) | |
9044 return xpath_node_set::type_sorted; | |
9045 | |
9046 document_order_comparator cmp; | |
9047 | |
9048 bool first = cmp(begin[0], begin[1]); | |
9049 | |
9050 for (const xpath_node* it = begin + 1; it + 1 < end; ++it) | |
9051 if (cmp(it[0], it[1]) != first) | |
9052 return xpath_node_set::type_unsorted; | |
9053 | |
9054 return first ? xpath_node_set::type_sorted : xpath_node_set::type_sorted_reverse; | |
9055 } | |
9056 | |
9057 PUGI_IMPL_FN xpath_node_set::type_t xpath_sort(xpath_node* begin, xpath_node* end, xpath_node_set::type_t type, bool rev) | |
9058 { | |
9059 xpath_node_set::type_t order = rev ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
9060 | |
9061 if (type == xpath_node_set::type_unsorted) | |
9062 { | |
9063 xpath_node_set::type_t sorted = xpath_get_order(begin, end); | |
9064 | |
9065 if (sorted == xpath_node_set::type_unsorted) | |
9066 { | |
9067 sort(begin, end, document_order_comparator()); | |
9068 | |
9069 type = xpath_node_set::type_sorted; | |
9070 } | |
9071 else | |
9072 type = sorted; | |
9073 } | |
9074 | |
9075 if (type != order) reverse(begin, end); | |
9076 | |
9077 return order; | |
9078 } | |
9079 | |
9080 PUGI_IMPL_FN xpath_node xpath_first(const xpath_node* begin, const xpath_node* end, xpath_node_set::type_t type) | |
9081 { | |
9082 if (begin == end) return xpath_node(); | |
9083 | |
9084 switch (type) | |
9085 { | |
9086 case xpath_node_set::type_sorted: | |
9087 return *begin; | |
9088 | |
9089 case xpath_node_set::type_sorted_reverse: | |
9090 return *(end - 1); | |
9091 | |
9092 case xpath_node_set::type_unsorted: | |
9093 return *min_element(begin, end, document_order_comparator()); | |
9094 | |
9095 default: | |
9096 assert(false && "Invalid node set type"); // unreachable | |
9097 return xpath_node(); | |
9098 } | |
9099 } | |
9100 | |
9101 class xpath_node_set_raw | |
9102 { | |
9103 xpath_node_set::type_t _type; | |
9104 | |
9105 xpath_node* _begin; | |
9106 xpath_node* _end; | |
9107 xpath_node* _eos; | |
9108 | |
9109 public: | |
9110 xpath_node_set_raw(): _type(xpath_node_set::type_unsorted), _begin(0), _end(0), _eos(0) | |
9111 { | |
9112 } | |
9113 | |
9114 xpath_node* begin() const | |
9115 { | |
9116 return _begin; | |
9117 } | |
9118 | |
9119 xpath_node* end() const | |
9120 { | |
9121 return _end; | |
9122 } | |
9123 | |
9124 bool empty() const | |
9125 { | |
9126 return _begin == _end; | |
9127 } | |
9128 | |
9129 size_t size() const | |
9130 { | |
9131 return static_cast<size_t>(_end - _begin); | |
9132 } | |
9133 | |
9134 xpath_node first() const | |
9135 { | |
9136 return xpath_first(_begin, _end, _type); | |
9137 } | |
9138 | |
9139 void push_back_grow(const xpath_node& node, xpath_allocator* alloc); | |
9140 | |
9141 void push_back(const xpath_node& node, xpath_allocator* alloc) | |
9142 { | |
9143 if (_end != _eos) | |
9144 *_end++ = node; | |
9145 else | |
9146 push_back_grow(node, alloc); | |
9147 } | |
9148 | |
9149 void append(const xpath_node* begin_, const xpath_node* end_, xpath_allocator* alloc) | |
9150 { | |
9151 if (begin_ == end_) return; | |
9152 | |
9153 size_t size_ = static_cast<size_t>(_end - _begin); | |
9154 size_t capacity = static_cast<size_t>(_eos - _begin); | |
9155 size_t count = static_cast<size_t>(end_ - begin_); | |
9156 | |
9157 if (size_ + count > capacity) | |
9158 { | |
9159 // reallocate the old array or allocate a new one | |
9160 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), (size_ + count) * sizeof(xpath_node))); | |
9161 if (!data) return; | |
9162 | |
9163 // finalize | |
9164 _begin = data; | |
9165 _end = data + size_; | |
9166 _eos = data + size_ + count; | |
9167 } | |
9168 | |
9169 memcpy(_end, begin_, count * sizeof(xpath_node)); | |
9170 _end += count; | |
9171 } | |
9172 | |
9173 void sort_do() | |
9174 { | |
9175 _type = xpath_sort(_begin, _end, _type, false); | |
9176 } | |
9177 | |
9178 void truncate(xpath_node* pos) | |
9179 { | |
9180 assert(_begin <= pos && pos <= _end); | |
9181 | |
9182 _end = pos; | |
9183 } | |
9184 | |
9185 void remove_duplicates(xpath_allocator* alloc) | |
9186 { | |
9187 if (_type == xpath_node_set::type_unsorted && _end - _begin > 2) | |
9188 { | |
9189 xpath_allocator_capture cr(alloc); | |
9190 | |
9191 size_t size_ = static_cast<size_t>(_end - _begin); | |
9192 | |
9193 size_t hash_size = 1; | |
9194 while (hash_size < size_ + size_ / 2) hash_size *= 2; | |
9195 | |
9196 const void** hash_data = static_cast<const void**>(alloc->allocate(hash_size * sizeof(void**))); | |
9197 if (!hash_data) return; | |
9198 | |
9199 memset(hash_data, 0, hash_size * sizeof(const void**)); | |
9200 | |
9201 xpath_node* write = _begin; | |
9202 | |
9203 for (xpath_node* it = _begin; it != _end; ++it) | |
9204 { | |
9205 const void* attr = it->attribute().internal_object(); | |
9206 const void* node = it->node().internal_object(); | |
9207 const void* key = attr ? attr : node; | |
9208 | |
9209 if (key && hash_insert(hash_data, hash_size, key)) | |
9210 { | |
9211 *write++ = *it; | |
9212 } | |
9213 } | |
9214 | |
9215 _end = write; | |
9216 } | |
9217 else | |
9218 { | |
9219 _end = unique(_begin, _end); | |
9220 } | |
9221 } | |
9222 | |
9223 xpath_node_set::type_t type() const | |
9224 { | |
9225 return _type; | |
9226 } | |
9227 | |
9228 void set_type(xpath_node_set::type_t value) | |
9229 { | |
9230 _type = value; | |
9231 } | |
9232 }; | |
9233 | |
9234 PUGI_IMPL_FN_NO_INLINE void xpath_node_set_raw::push_back_grow(const xpath_node& node, xpath_allocator* alloc) | |
9235 { | |
9236 size_t capacity = static_cast<size_t>(_eos - _begin); | |
9237 | |
9238 // get new capacity (1.5x rule) | |
9239 size_t new_capacity = capacity + capacity / 2 + 1; | |
9240 | |
9241 // reallocate the old array or allocate a new one | |
9242 xpath_node* data = static_cast<xpath_node*>(alloc->reallocate(_begin, capacity * sizeof(xpath_node), new_capacity * sizeof(xpath_node))); | |
9243 if (!data) return; | |
9244 | |
9245 // finalize | |
9246 _begin = data; | |
9247 _end = data + capacity; | |
9248 _eos = data + new_capacity; | |
9249 | |
9250 // push | |
9251 *_end++ = node; | |
9252 } | |
9253 PUGI_IMPL_NS_END | |
9254 | |
9255 PUGI_IMPL_NS_BEGIN | |
9256 struct xpath_context | |
9257 { | |
9258 xpath_node n; | |
9259 size_t position, size; | |
9260 | |
9261 xpath_context(const xpath_node& n_, size_t position_, size_t size_): n(n_), position(position_), size(size_) | |
9262 { | |
9263 } | |
9264 }; | |
9265 | |
9266 enum lexeme_t | |
9267 { | |
9268 lex_none = 0, | |
9269 lex_equal, | |
9270 lex_not_equal, | |
9271 lex_less, | |
9272 lex_greater, | |
9273 lex_less_or_equal, | |
9274 lex_greater_or_equal, | |
9275 lex_plus, | |
9276 lex_minus, | |
9277 lex_multiply, | |
9278 lex_union, | |
9279 lex_var_ref, | |
9280 lex_open_brace, | |
9281 lex_close_brace, | |
9282 lex_quoted_string, | |
9283 lex_number, | |
9284 lex_slash, | |
9285 lex_double_slash, | |
9286 lex_open_square_brace, | |
9287 lex_close_square_brace, | |
9288 lex_string, | |
9289 lex_comma, | |
9290 lex_axis_attribute, | |
9291 lex_dot, | |
9292 lex_double_dot, | |
9293 lex_double_colon, | |
9294 lex_eof | |
9295 }; | |
9296 | |
9297 struct xpath_lexer_string | |
9298 { | |
9299 const char_t* begin; | |
9300 const char_t* end; | |
9301 | |
9302 xpath_lexer_string(): begin(0), end(0) | |
9303 { | |
9304 } | |
9305 | |
9306 bool operator==(const char_t* other) const | |
9307 { | |
9308 size_t length = static_cast<size_t>(end - begin); | |
9309 | |
9310 return strequalrange(other, begin, length); | |
9311 } | |
9312 }; | |
9313 | |
9314 class xpath_lexer | |
9315 { | |
9316 const char_t* _cur; | |
9317 const char_t* _cur_lexeme_pos; | |
9318 xpath_lexer_string _cur_lexeme_contents; | |
9319 | |
9320 lexeme_t _cur_lexeme; | |
9321 | |
9322 public: | |
9323 explicit xpath_lexer(const char_t* query): _cur(query) | |
9324 { | |
9325 next(); | |
9326 } | |
9327 | |
9328 const char_t* state() const | |
9329 { | |
9330 return _cur; | |
9331 } | |
9332 | |
9333 void next() | |
9334 { | |
9335 const char_t* cur = _cur; | |
9336 | |
9337 while (PUGI_IMPL_IS_CHARTYPE(*cur, ct_space)) ++cur; | |
9338 | |
9339 // save lexeme position for error reporting | |
9340 _cur_lexeme_pos = cur; | |
9341 | |
9342 switch (*cur) | |
9343 { | |
9344 case 0: | |
9345 _cur_lexeme = lex_eof; | |
9346 break; | |
9347 | |
9348 case '>': | |
9349 if (*(cur+1) == '=') | |
9350 { | |
9351 cur += 2; | |
9352 _cur_lexeme = lex_greater_or_equal; | |
9353 } | |
9354 else | |
9355 { | |
9356 cur += 1; | |
9357 _cur_lexeme = lex_greater; | |
9358 } | |
9359 break; | |
9360 | |
9361 case '<': | |
9362 if (*(cur+1) == '=') | |
9363 { | |
9364 cur += 2; | |
9365 _cur_lexeme = lex_less_or_equal; | |
9366 } | |
9367 else | |
9368 { | |
9369 cur += 1; | |
9370 _cur_lexeme = lex_less; | |
9371 } | |
9372 break; | |
9373 | |
9374 case '!': | |
9375 if (*(cur+1) == '=') | |
9376 { | |
9377 cur += 2; | |
9378 _cur_lexeme = lex_not_equal; | |
9379 } | |
9380 else | |
9381 { | |
9382 _cur_lexeme = lex_none; | |
9383 } | |
9384 break; | |
9385 | |
9386 case '=': | |
9387 cur += 1; | |
9388 _cur_lexeme = lex_equal; | |
9389 | |
9390 break; | |
9391 | |
9392 case '+': | |
9393 cur += 1; | |
9394 _cur_lexeme = lex_plus; | |
9395 | |
9396 break; | |
9397 | |
9398 case '-': | |
9399 cur += 1; | |
9400 _cur_lexeme = lex_minus; | |
9401 | |
9402 break; | |
9403 | |
9404 case '*': | |
9405 cur += 1; | |
9406 _cur_lexeme = lex_multiply; | |
9407 | |
9408 break; | |
9409 | |
9410 case '|': | |
9411 cur += 1; | |
9412 _cur_lexeme = lex_union; | |
9413 | |
9414 break; | |
9415 | |
9416 case '$': | |
9417 cur += 1; | |
9418 | |
9419 if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
9420 { | |
9421 _cur_lexeme_contents.begin = cur; | |
9422 | |
9423 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9424 | |
9425 if (cur[0] == ':' && PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // qname | |
9426 { | |
9427 cur++; // : | |
9428 | |
9429 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9430 } | |
9431 | |
9432 _cur_lexeme_contents.end = cur; | |
9433 | |
9434 _cur_lexeme = lex_var_ref; | |
9435 } | |
9436 else | |
9437 { | |
9438 _cur_lexeme = lex_none; | |
9439 } | |
9440 | |
9441 break; | |
9442 | |
9443 case '(': | |
9444 cur += 1; | |
9445 _cur_lexeme = lex_open_brace; | |
9446 | |
9447 break; | |
9448 | |
9449 case ')': | |
9450 cur += 1; | |
9451 _cur_lexeme = lex_close_brace; | |
9452 | |
9453 break; | |
9454 | |
9455 case '[': | |
9456 cur += 1; | |
9457 _cur_lexeme = lex_open_square_brace; | |
9458 | |
9459 break; | |
9460 | |
9461 case ']': | |
9462 cur += 1; | |
9463 _cur_lexeme = lex_close_square_brace; | |
9464 | |
9465 break; | |
9466 | |
9467 case ',': | |
9468 cur += 1; | |
9469 _cur_lexeme = lex_comma; | |
9470 | |
9471 break; | |
9472 | |
9473 case '/': | |
9474 if (*(cur+1) == '/') | |
9475 { | |
9476 cur += 2; | |
9477 _cur_lexeme = lex_double_slash; | |
9478 } | |
9479 else | |
9480 { | |
9481 cur += 1; | |
9482 _cur_lexeme = lex_slash; | |
9483 } | |
9484 break; | |
9485 | |
9486 case '.': | |
9487 if (*(cur+1) == '.') | |
9488 { | |
9489 cur += 2; | |
9490 _cur_lexeme = lex_double_dot; | |
9491 } | |
9492 else if (PUGI_IMPL_IS_CHARTYPEX(*(cur+1), ctx_digit)) | |
9493 { | |
9494 _cur_lexeme_contents.begin = cur; // . | |
9495 | |
9496 ++cur; | |
9497 | |
9498 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9499 | |
9500 _cur_lexeme_contents.end = cur; | |
9501 | |
9502 _cur_lexeme = lex_number; | |
9503 } | |
9504 else | |
9505 { | |
9506 cur += 1; | |
9507 _cur_lexeme = lex_dot; | |
9508 } | |
9509 break; | |
9510 | |
9511 case '@': | |
9512 cur += 1; | |
9513 _cur_lexeme = lex_axis_attribute; | |
9514 | |
9515 break; | |
9516 | |
9517 case '"': | |
9518 case '\'': | |
9519 { | |
9520 char_t terminator = *cur; | |
9521 | |
9522 ++cur; | |
9523 | |
9524 _cur_lexeme_contents.begin = cur; | |
9525 while (*cur && *cur != terminator) cur++; | |
9526 _cur_lexeme_contents.end = cur; | |
9527 | |
9528 if (!*cur) | |
9529 _cur_lexeme = lex_none; | |
9530 else | |
9531 { | |
9532 cur += 1; | |
9533 _cur_lexeme = lex_quoted_string; | |
9534 } | |
9535 | |
9536 break; | |
9537 } | |
9538 | |
9539 case ':': | |
9540 if (*(cur+1) == ':') | |
9541 { | |
9542 cur += 2; | |
9543 _cur_lexeme = lex_double_colon; | |
9544 } | |
9545 else | |
9546 { | |
9547 _cur_lexeme = lex_none; | |
9548 } | |
9549 break; | |
9550 | |
9551 default: | |
9552 if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) | |
9553 { | |
9554 _cur_lexeme_contents.begin = cur; | |
9555 | |
9556 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9557 | |
9558 if (*cur == '.') | |
9559 { | |
9560 cur++; | |
9561 | |
9562 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_digit)) cur++; | |
9563 } | |
9564 | |
9565 _cur_lexeme_contents.end = cur; | |
9566 | |
9567 _cur_lexeme = lex_number; | |
9568 } | |
9569 else if (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_start_symbol)) | |
9570 { | |
9571 _cur_lexeme_contents.begin = cur; | |
9572 | |
9573 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9574 | |
9575 if (cur[0] == ':') | |
9576 { | |
9577 if (cur[1] == '*') // namespace test ncname:* | |
9578 { | |
9579 cur += 2; // :* | |
9580 } | |
9581 else if (PUGI_IMPL_IS_CHARTYPEX(cur[1], ctx_symbol)) // namespace test qname | |
9582 { | |
9583 cur++; // : | |
9584 | |
9585 while (PUGI_IMPL_IS_CHARTYPEX(*cur, ctx_symbol)) cur++; | |
9586 } | |
9587 } | |
9588 | |
9589 _cur_lexeme_contents.end = cur; | |
9590 | |
9591 _cur_lexeme = lex_string; | |
9592 } | |
9593 else | |
9594 { | |
9595 _cur_lexeme = lex_none; | |
9596 } | |
9597 } | |
9598 | |
9599 _cur = cur; | |
9600 } | |
9601 | |
9602 lexeme_t current() const | |
9603 { | |
9604 return _cur_lexeme; | |
9605 } | |
9606 | |
9607 const char_t* current_pos() const | |
9608 { | |
9609 return _cur_lexeme_pos; | |
9610 } | |
9611 | |
9612 const xpath_lexer_string& contents() const | |
9613 { | |
9614 assert(_cur_lexeme == lex_var_ref || _cur_lexeme == lex_number || _cur_lexeme == lex_string || _cur_lexeme == lex_quoted_string); | |
9615 | |
9616 return _cur_lexeme_contents; | |
9617 } | |
9618 }; | |
9619 | |
9620 enum ast_type_t | |
9621 { | |
9622 ast_unknown, | |
9623 ast_op_or, // left or right | |
9624 ast_op_and, // left and right | |
9625 ast_op_equal, // left = right | |
9626 ast_op_not_equal, // left != right | |
9627 ast_op_less, // left < right | |
9628 ast_op_greater, // left > right | |
9629 ast_op_less_or_equal, // left <= right | |
9630 ast_op_greater_or_equal, // left >= right | |
9631 ast_op_add, // left + right | |
9632 ast_op_subtract, // left - right | |
9633 ast_op_multiply, // left * right | |
9634 ast_op_divide, // left / right | |
9635 ast_op_mod, // left % right | |
9636 ast_op_negate, // left - right | |
9637 ast_op_union, // left | right | |
9638 ast_predicate, // apply predicate to set; next points to next predicate | |
9639 ast_filter, // select * from left where right | |
9640 ast_string_constant, // string constant | |
9641 ast_number_constant, // number constant | |
9642 ast_variable, // variable | |
9643 ast_func_last, // last() | |
9644 ast_func_position, // position() | |
9645 ast_func_count, // count(left) | |
9646 ast_func_id, // id(left) | |
9647 ast_func_local_name_0, // local-name() | |
9648 ast_func_local_name_1, // local-name(left) | |
9649 ast_func_namespace_uri_0, // namespace-uri() | |
9650 ast_func_namespace_uri_1, // namespace-uri(left) | |
9651 ast_func_name_0, // name() | |
9652 ast_func_name_1, // name(left) | |
9653 ast_func_string_0, // string() | |
9654 ast_func_string_1, // string(left) | |
9655 ast_func_concat, // concat(left, right, siblings) | |
9656 ast_func_starts_with, // starts_with(left, right) | |
9657 ast_func_contains, // contains(left, right) | |
9658 ast_func_substring_before, // substring-before(left, right) | |
9659 ast_func_substring_after, // substring-after(left, right) | |
9660 ast_func_substring_2, // substring(left, right) | |
9661 ast_func_substring_3, // substring(left, right, third) | |
9662 ast_func_string_length_0, // string-length() | |
9663 ast_func_string_length_1, // string-length(left) | |
9664 ast_func_normalize_space_0, // normalize-space() | |
9665 ast_func_normalize_space_1, // normalize-space(left) | |
9666 ast_func_translate, // translate(left, right, third) | |
9667 ast_func_boolean, // boolean(left) | |
9668 ast_func_not, // not(left) | |
9669 ast_func_true, // true() | |
9670 ast_func_false, // false() | |
9671 ast_func_lang, // lang(left) | |
9672 ast_func_number_0, // number() | |
9673 ast_func_number_1, // number(left) | |
9674 ast_func_sum, // sum(left) | |
9675 ast_func_floor, // floor(left) | |
9676 ast_func_ceiling, // ceiling(left) | |
9677 ast_func_round, // round(left) | |
9678 ast_step, // process set left with step | |
9679 ast_step_root, // select root node | |
9680 | |
9681 ast_opt_translate_table, // translate(left, right, third) where right/third are constants | |
9682 ast_opt_compare_attribute // @name = 'string' | |
9683 }; | |
9684 | |
9685 enum axis_t | |
9686 { | |
9687 axis_ancestor, | |
9688 axis_ancestor_or_self, | |
9689 axis_attribute, | |
9690 axis_child, | |
9691 axis_descendant, | |
9692 axis_descendant_or_self, | |
9693 axis_following, | |
9694 axis_following_sibling, | |
9695 axis_namespace, | |
9696 axis_parent, | |
9697 axis_preceding, | |
9698 axis_preceding_sibling, | |
9699 axis_self | |
9700 }; | |
9701 | |
9702 enum nodetest_t | |
9703 { | |
9704 nodetest_none, | |
9705 nodetest_name, | |
9706 nodetest_type_node, | |
9707 nodetest_type_comment, | |
9708 nodetest_type_pi, | |
9709 nodetest_type_text, | |
9710 nodetest_pi, | |
9711 nodetest_all, | |
9712 nodetest_all_in_namespace | |
9713 }; | |
9714 | |
9715 enum predicate_t | |
9716 { | |
9717 predicate_default, | |
9718 predicate_posinv, | |
9719 predicate_constant, | |
9720 predicate_constant_one | |
9721 }; | |
9722 | |
9723 enum nodeset_eval_t | |
9724 { | |
9725 nodeset_eval_all, | |
9726 nodeset_eval_any, | |
9727 nodeset_eval_first | |
9728 }; | |
9729 | |
9730 template <axis_t N> struct axis_to_type | |
9731 { | |
9732 static const axis_t axis; | |
9733 }; | |
9734 | |
9735 template <axis_t N> const axis_t axis_to_type<N>::axis = N; | |
9736 | |
9737 class xpath_ast_node | |
9738 { | |
9739 private: | |
9740 // node type | |
9741 char _type; | |
9742 char _rettype; | |
9743 | |
9744 // for ast_step | |
9745 char _axis; | |
9746 | |
9747 // for ast_step/ast_predicate/ast_filter | |
9748 char _test; | |
9749 | |
9750 // tree node structure | |
9751 xpath_ast_node* _left; | |
9752 xpath_ast_node* _right; | |
9753 xpath_ast_node* _next; | |
9754 | |
9755 union | |
9756 { | |
9757 // value for ast_string_constant | |
9758 const char_t* string; | |
9759 // value for ast_number_constant | |
9760 double number; | |
9761 // variable for ast_variable | |
9762 xpath_variable* variable; | |
9763 // node test for ast_step (node name/namespace/node type/pi target) | |
9764 const char_t* nodetest; | |
9765 // table for ast_opt_translate_table | |
9766 const unsigned char* table; | |
9767 } _data; | |
9768 | |
9769 xpath_ast_node(const xpath_ast_node&); | |
9770 xpath_ast_node& operator=(const xpath_ast_node&); | |
9771 | |
9772 template <class Comp> static bool compare_eq(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
9773 { | |
9774 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
9775 | |
9776 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
9777 { | |
9778 if (lt == xpath_type_boolean || rt == xpath_type_boolean) | |
9779 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
9780 else if (lt == xpath_type_number || rt == xpath_type_number) | |
9781 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
9782 else if (lt == xpath_type_string || rt == xpath_type_string) | |
9783 { | |
9784 xpath_allocator_capture cr(stack.result); | |
9785 | |
9786 xpath_string ls = lhs->eval_string(c, stack); | |
9787 xpath_string rs = rhs->eval_string(c, stack); | |
9788 | |
9789 return comp(ls, rs); | |
9790 } | |
9791 } | |
9792 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
9793 { | |
9794 xpath_allocator_capture cr(stack.result); | |
9795 | |
9796 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9797 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9798 | |
9799 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9800 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9801 { | |
9802 xpath_allocator_capture cri(stack.result); | |
9803 | |
9804 if (comp(string_value(*li, stack.result), string_value(*ri, stack.result))) | |
9805 return true; | |
9806 } | |
9807 | |
9808 return false; | |
9809 } | |
9810 else | |
9811 { | |
9812 if (lt == xpath_type_node_set) | |
9813 { | |
9814 swap(lhs, rhs); | |
9815 swap(lt, rt); | |
9816 } | |
9817 | |
9818 if (lt == xpath_type_boolean) | |
9819 return comp(lhs->eval_boolean(c, stack), rhs->eval_boolean(c, stack)); | |
9820 else if (lt == xpath_type_number) | |
9821 { | |
9822 xpath_allocator_capture cr(stack.result); | |
9823 | |
9824 double l = lhs->eval_number(c, stack); | |
9825 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9826 | |
9827 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9828 { | |
9829 xpath_allocator_capture cri(stack.result); | |
9830 | |
9831 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9832 return true; | |
9833 } | |
9834 | |
9835 return false; | |
9836 } | |
9837 else if (lt == xpath_type_string) | |
9838 { | |
9839 xpath_allocator_capture cr(stack.result); | |
9840 | |
9841 xpath_string l = lhs->eval_string(c, stack); | |
9842 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9843 | |
9844 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9845 { | |
9846 xpath_allocator_capture cri(stack.result); | |
9847 | |
9848 if (comp(l, string_value(*ri, stack.result))) | |
9849 return true; | |
9850 } | |
9851 | |
9852 return false; | |
9853 } | |
9854 } | |
9855 | |
9856 assert(false && "Wrong types"); // unreachable | |
9857 return false; | |
9858 } | |
9859 | |
9860 static bool eval_once(xpath_node_set::type_t type, nodeset_eval_t eval) | |
9861 { | |
9862 return type == xpath_node_set::type_sorted ? eval != nodeset_eval_all : eval == nodeset_eval_any; | |
9863 } | |
9864 | |
9865 template <class Comp> static bool compare_rel(xpath_ast_node* lhs, xpath_ast_node* rhs, const xpath_context& c, const xpath_stack& stack, const Comp& comp) | |
9866 { | |
9867 xpath_value_type lt = lhs->rettype(), rt = rhs->rettype(); | |
9868 | |
9869 if (lt != xpath_type_node_set && rt != xpath_type_node_set) | |
9870 return comp(lhs->eval_number(c, stack), rhs->eval_number(c, stack)); | |
9871 else if (lt == xpath_type_node_set && rt == xpath_type_node_set) | |
9872 { | |
9873 xpath_allocator_capture cr(stack.result); | |
9874 | |
9875 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9876 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9877 | |
9878 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9879 { | |
9880 xpath_allocator_capture cri(stack.result); | |
9881 | |
9882 double l = convert_string_to_number(string_value(*li, stack.result).c_str()); | |
9883 | |
9884 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9885 { | |
9886 xpath_allocator_capture crii(stack.result); | |
9887 | |
9888 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9889 return true; | |
9890 } | |
9891 } | |
9892 | |
9893 return false; | |
9894 } | |
9895 else if (lt != xpath_type_node_set && rt == xpath_type_node_set) | |
9896 { | |
9897 xpath_allocator_capture cr(stack.result); | |
9898 | |
9899 double l = lhs->eval_number(c, stack); | |
9900 xpath_node_set_raw rs = rhs->eval_node_set(c, stack, nodeset_eval_all); | |
9901 | |
9902 for (const xpath_node* ri = rs.begin(); ri != rs.end(); ++ri) | |
9903 { | |
9904 xpath_allocator_capture cri(stack.result); | |
9905 | |
9906 if (comp(l, convert_string_to_number(string_value(*ri, stack.result).c_str()))) | |
9907 return true; | |
9908 } | |
9909 | |
9910 return false; | |
9911 } | |
9912 else if (lt == xpath_type_node_set && rt != xpath_type_node_set) | |
9913 { | |
9914 xpath_allocator_capture cr(stack.result); | |
9915 | |
9916 xpath_node_set_raw ls = lhs->eval_node_set(c, stack, nodeset_eval_all); | |
9917 double r = rhs->eval_number(c, stack); | |
9918 | |
9919 for (const xpath_node* li = ls.begin(); li != ls.end(); ++li) | |
9920 { | |
9921 xpath_allocator_capture cri(stack.result); | |
9922 | |
9923 if (comp(convert_string_to_number(string_value(*li, stack.result).c_str()), r)) | |
9924 return true; | |
9925 } | |
9926 | |
9927 return false; | |
9928 } | |
9929 else | |
9930 { | |
9931 assert(false && "Wrong types"); // unreachable | |
9932 return false; | |
9933 } | |
9934 } | |
9935 | |
9936 static void apply_predicate_boolean(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
9937 { | |
9938 assert(ns.size() >= first); | |
9939 assert(expr->rettype() != xpath_type_number); | |
9940 | |
9941 size_t i = 1; | |
9942 size_t size = ns.size() - first; | |
9943 | |
9944 xpath_node* last = ns.begin() + first; | |
9945 | |
9946 // remove_if... or well, sort of | |
9947 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
9948 { | |
9949 xpath_context c(*it, i, size); | |
9950 | |
9951 if (expr->eval_boolean(c, stack)) | |
9952 { | |
9953 *last++ = *it; | |
9954 | |
9955 if (once) break; | |
9956 } | |
9957 } | |
9958 | |
9959 ns.truncate(last); | |
9960 } | |
9961 | |
9962 static void apply_predicate_number(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack, bool once) | |
9963 { | |
9964 assert(ns.size() >= first); | |
9965 assert(expr->rettype() == xpath_type_number); | |
9966 | |
9967 size_t i = 1; | |
9968 size_t size = ns.size() - first; | |
9969 | |
9970 xpath_node* last = ns.begin() + first; | |
9971 | |
9972 // remove_if... or well, sort of | |
9973 for (xpath_node* it = last; it != ns.end(); ++it, ++i) | |
9974 { | |
9975 xpath_context c(*it, i, size); | |
9976 | |
9977 if (expr->eval_number(c, stack) == static_cast<double>(i)) | |
9978 { | |
9979 *last++ = *it; | |
9980 | |
9981 if (once) break; | |
9982 } | |
9983 } | |
9984 | |
9985 ns.truncate(last); | |
9986 } | |
9987 | |
9988 static void apply_predicate_number_const(xpath_node_set_raw& ns, size_t first, xpath_ast_node* expr, const xpath_stack& stack) | |
9989 { | |
9990 assert(ns.size() >= first); | |
9991 assert(expr->rettype() == xpath_type_number); | |
9992 | |
9993 size_t size = ns.size() - first; | |
9994 | |
9995 xpath_node* last = ns.begin() + first; | |
9996 | |
9997 xpath_node cn; | |
9998 xpath_context c(cn, 1, size); | |
9999 | |
10000 double er = expr->eval_number(c, stack); | |
10001 | |
10002 if (er >= 1.0 && er <= static_cast<double>(size)) | |
10003 { | |
10004 size_t eri = static_cast<size_t>(er); | |
10005 | |
10006 if (er == static_cast<double>(eri)) | |
10007 { | |
10008 xpath_node r = last[eri - 1]; | |
10009 | |
10010 *last++ = r; | |
10011 } | |
10012 } | |
10013 | |
10014 ns.truncate(last); | |
10015 } | |
10016 | |
10017 void apply_predicate(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, bool once) | |
10018 { | |
10019 if (ns.size() == first) return; | |
10020 | |
10021 assert(_type == ast_filter || _type == ast_predicate); | |
10022 | |
10023 if (_test == predicate_constant || _test == predicate_constant_one) | |
10024 apply_predicate_number_const(ns, first, _right, stack); | |
10025 else if (_right->rettype() == xpath_type_number) | |
10026 apply_predicate_number(ns, first, _right, stack, once); | |
10027 else | |
10028 apply_predicate_boolean(ns, first, _right, stack, once); | |
10029 } | |
10030 | |
10031 void apply_predicates(xpath_node_set_raw& ns, size_t first, const xpath_stack& stack, nodeset_eval_t eval) | |
10032 { | |
10033 if (ns.size() == first) return; | |
10034 | |
10035 bool last_once = eval_once(ns.type(), eval); | |
10036 | |
10037 for (xpath_ast_node* pred = _right; pred; pred = pred->_next) | |
10038 pred->apply_predicate(ns, first, stack, !pred->_next && last_once); | |
10039 } | |
10040 | |
10041 bool step_push(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* parent, xpath_allocator* alloc) | |
10042 { | |
10043 assert(a); | |
10044 | |
10045 const char_t* name = a->name ? a->name + 0 : PUGIXML_TEXT(""); | |
10046 | |
10047 switch (_test) | |
10048 { | |
10049 case nodetest_name: | |
10050 if (strequal(name, _data.nodetest) && is_xpath_attribute(name)) | |
10051 { | |
10052 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
10053 return true; | |
10054 } | |
10055 break; | |
10056 | |
10057 case nodetest_type_node: | |
10058 case nodetest_all: | |
10059 if (is_xpath_attribute(name)) | |
10060 { | |
10061 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
10062 return true; | |
10063 } | |
10064 break; | |
10065 | |
10066 case nodetest_all_in_namespace: | |
10067 if (starts_with(name, _data.nodetest) && is_xpath_attribute(name)) | |
10068 { | |
10069 ns.push_back(xpath_node(xml_attribute(a), xml_node(parent)), alloc); | |
10070 return true; | |
10071 } | |
10072 break; | |
10073 | |
10074 default: | |
10075 ; | |
10076 } | |
10077 | |
10078 return false; | |
10079 } | |
10080 | |
10081 bool step_push(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc) | |
10082 { | |
10083 assert(n); | |
10084 | |
10085 xml_node_type type = PUGI_IMPL_NODETYPE(n); | |
10086 | |
10087 switch (_test) | |
10088 { | |
10089 case nodetest_name: | |
10090 if (type == node_element && n->name && strequal(n->name, _data.nodetest)) | |
10091 { | |
10092 ns.push_back(xml_node(n), alloc); | |
10093 return true; | |
10094 } | |
10095 break; | |
10096 | |
10097 case nodetest_type_node: | |
10098 ns.push_back(xml_node(n), alloc); | |
10099 return true; | |
10100 | |
10101 case nodetest_type_comment: | |
10102 if (type == node_comment) | |
10103 { | |
10104 ns.push_back(xml_node(n), alloc); | |
10105 return true; | |
10106 } | |
10107 break; | |
10108 | |
10109 case nodetest_type_text: | |
10110 if (type == node_pcdata || type == node_cdata) | |
10111 { | |
10112 ns.push_back(xml_node(n), alloc); | |
10113 return true; | |
10114 } | |
10115 break; | |
10116 | |
10117 case nodetest_type_pi: | |
10118 if (type == node_pi) | |
10119 { | |
10120 ns.push_back(xml_node(n), alloc); | |
10121 return true; | |
10122 } | |
10123 break; | |
10124 | |
10125 case nodetest_pi: | |
10126 if (type == node_pi && n->name && strequal(n->name, _data.nodetest)) | |
10127 { | |
10128 ns.push_back(xml_node(n), alloc); | |
10129 return true; | |
10130 } | |
10131 break; | |
10132 | |
10133 case nodetest_all: | |
10134 if (type == node_element) | |
10135 { | |
10136 ns.push_back(xml_node(n), alloc); | |
10137 return true; | |
10138 } | |
10139 break; | |
10140 | |
10141 case nodetest_all_in_namespace: | |
10142 if (type == node_element && n->name && starts_with(n->name, _data.nodetest)) | |
10143 { | |
10144 ns.push_back(xml_node(n), alloc); | |
10145 return true; | |
10146 } | |
10147 break; | |
10148 | |
10149 default: | |
10150 assert(false && "Unknown axis"); // unreachable | |
10151 } | |
10152 | |
10153 return false; | |
10154 } | |
10155 | |
10156 template <class T> void step_fill(xpath_node_set_raw& ns, xml_node_struct* n, xpath_allocator* alloc, bool once, T) | |
10157 { | |
10158 const axis_t axis = T::axis; | |
10159 | |
10160 switch (axis) | |
10161 { | |
10162 case axis_attribute: | |
10163 { | |
10164 for (xml_attribute_struct* a = n->first_attribute; a; a = a->next_attribute) | |
10165 if (step_push(ns, a, n, alloc) & once) | |
10166 return; | |
10167 | |
10168 break; | |
10169 } | |
10170 | |
10171 case axis_child: | |
10172 { | |
10173 for (xml_node_struct* c = n->first_child; c; c = c->next_sibling) | |
10174 if (step_push(ns, c, alloc) & once) | |
10175 return; | |
10176 | |
10177 break; | |
10178 } | |
10179 | |
10180 case axis_descendant: | |
10181 case axis_descendant_or_self: | |
10182 { | |
10183 if (axis == axis_descendant_or_self) | |
10184 if (step_push(ns, n, alloc) & once) | |
10185 return; | |
10186 | |
10187 xml_node_struct* cur = n->first_child; | |
10188 | |
10189 while (cur) | |
10190 { | |
10191 if (step_push(ns, cur, alloc) & once) | |
10192 return; | |
10193 | |
10194 if (cur->first_child) | |
10195 cur = cur->first_child; | |
10196 else | |
10197 { | |
10198 while (!cur->next_sibling) | |
10199 { | |
10200 cur = cur->parent; | |
10201 | |
10202 if (cur == n) return; | |
10203 } | |
10204 | |
10205 cur = cur->next_sibling; | |
10206 } | |
10207 } | |
10208 | |
10209 break; | |
10210 } | |
10211 | |
10212 case axis_following_sibling: | |
10213 { | |
10214 for (xml_node_struct* c = n->next_sibling; c; c = c->next_sibling) | |
10215 if (step_push(ns, c, alloc) & once) | |
10216 return; | |
10217 | |
10218 break; | |
10219 } | |
10220 | |
10221 case axis_preceding_sibling: | |
10222 { | |
10223 for (xml_node_struct* c = n->prev_sibling_c; c->next_sibling; c = c->prev_sibling_c) | |
10224 if (step_push(ns, c, alloc) & once) | |
10225 return; | |
10226 | |
10227 break; | |
10228 } | |
10229 | |
10230 case axis_following: | |
10231 { | |
10232 xml_node_struct* cur = n; | |
10233 | |
10234 // exit from this node so that we don't include descendants | |
10235 while (!cur->next_sibling) | |
10236 { | |
10237 cur = cur->parent; | |
10238 | |
10239 if (!cur) return; | |
10240 } | |
10241 | |
10242 cur = cur->next_sibling; | |
10243 | |
10244 while (cur) | |
10245 { | |
10246 if (step_push(ns, cur, alloc) & once) | |
10247 return; | |
10248 | |
10249 if (cur->first_child) | |
10250 cur = cur->first_child; | |
10251 else | |
10252 { | |
10253 while (!cur->next_sibling) | |
10254 { | |
10255 cur = cur->parent; | |
10256 | |
10257 if (!cur) return; | |
10258 } | |
10259 | |
10260 cur = cur->next_sibling; | |
10261 } | |
10262 } | |
10263 | |
10264 break; | |
10265 } | |
10266 | |
10267 case axis_preceding: | |
10268 { | |
10269 xml_node_struct* cur = n; | |
10270 | |
10271 // exit from this node so that we don't include descendants | |
10272 while (!cur->prev_sibling_c->next_sibling) | |
10273 { | |
10274 cur = cur->parent; | |
10275 | |
10276 if (!cur) return; | |
10277 } | |
10278 | |
10279 cur = cur->prev_sibling_c; | |
10280 | |
10281 while (cur) | |
10282 { | |
10283 if (cur->first_child) | |
10284 cur = cur->first_child->prev_sibling_c; | |
10285 else | |
10286 { | |
10287 // leaf node, can't be ancestor | |
10288 if (step_push(ns, cur, alloc) & once) | |
10289 return; | |
10290 | |
10291 while (!cur->prev_sibling_c->next_sibling) | |
10292 { | |
10293 cur = cur->parent; | |
10294 | |
10295 if (!cur) return; | |
10296 | |
10297 if (!node_is_ancestor(cur, n)) | |
10298 if (step_push(ns, cur, alloc) & once) | |
10299 return; | |
10300 } | |
10301 | |
10302 cur = cur->prev_sibling_c; | |
10303 } | |
10304 } | |
10305 | |
10306 break; | |
10307 } | |
10308 | |
10309 case axis_ancestor: | |
10310 case axis_ancestor_or_self: | |
10311 { | |
10312 if (axis == axis_ancestor_or_self) | |
10313 if (step_push(ns, n, alloc) & once) | |
10314 return; | |
10315 | |
10316 xml_node_struct* cur = n->parent; | |
10317 | |
10318 while (cur) | |
10319 { | |
10320 if (step_push(ns, cur, alloc) & once) | |
10321 return; | |
10322 | |
10323 cur = cur->parent; | |
10324 } | |
10325 | |
10326 break; | |
10327 } | |
10328 | |
10329 case axis_self: | |
10330 { | |
10331 step_push(ns, n, alloc); | |
10332 | |
10333 break; | |
10334 } | |
10335 | |
10336 case axis_parent: | |
10337 { | |
10338 if (n->parent) | |
10339 step_push(ns, n->parent, alloc); | |
10340 | |
10341 break; | |
10342 } | |
10343 | |
10344 default: | |
10345 assert(false && "Unimplemented axis"); // unreachable | |
10346 } | |
10347 } | |
10348 | |
10349 template <class T> void step_fill(xpath_node_set_raw& ns, xml_attribute_struct* a, xml_node_struct* p, xpath_allocator* alloc, bool once, T v) | |
10350 { | |
10351 const axis_t axis = T::axis; | |
10352 | |
10353 switch (axis) | |
10354 { | |
10355 case axis_ancestor: | |
10356 case axis_ancestor_or_self: | |
10357 { | |
10358 if (axis == axis_ancestor_or_self && _test == nodetest_type_node) // reject attributes based on principal node type test | |
10359 if (step_push(ns, a, p, alloc) & once) | |
10360 return; | |
10361 | |
10362 xml_node_struct* cur = p; | |
10363 | |
10364 while (cur) | |
10365 { | |
10366 if (step_push(ns, cur, alloc) & once) | |
10367 return; | |
10368 | |
10369 cur = cur->parent; | |
10370 } | |
10371 | |
10372 break; | |
10373 } | |
10374 | |
10375 case axis_descendant_or_self: | |
10376 case axis_self: | |
10377 { | |
10378 if (_test == nodetest_type_node) // reject attributes based on principal node type test | |
10379 step_push(ns, a, p, alloc); | |
10380 | |
10381 break; | |
10382 } | |
10383 | |
10384 case axis_following: | |
10385 { | |
10386 xml_node_struct* cur = p; | |
10387 | |
10388 while (cur) | |
10389 { | |
10390 if (cur->first_child) | |
10391 cur = cur->first_child; | |
10392 else | |
10393 { | |
10394 while (!cur->next_sibling) | |
10395 { | |
10396 cur = cur->parent; | |
10397 | |
10398 if (!cur) return; | |
10399 } | |
10400 | |
10401 cur = cur->next_sibling; | |
10402 } | |
10403 | |
10404 if (step_push(ns, cur, alloc) & once) | |
10405 return; | |
10406 } | |
10407 | |
10408 break; | |
10409 } | |
10410 | |
10411 case axis_parent: | |
10412 { | |
10413 step_push(ns, p, alloc); | |
10414 | |
10415 break; | |
10416 } | |
10417 | |
10418 case axis_preceding: | |
10419 { | |
10420 // preceding:: axis does not include attribute nodes and attribute ancestors (they are the same as parent's ancestors), so we can reuse node preceding | |
10421 step_fill(ns, p, alloc, once, v); | |
10422 break; | |
10423 } | |
10424 | |
10425 default: | |
10426 assert(false && "Unimplemented axis"); // unreachable | |
10427 } | |
10428 } | |
10429 | |
10430 template <class T> void step_fill(xpath_node_set_raw& ns, const xpath_node& xn, xpath_allocator* alloc, bool once, T v) | |
10431 { | |
10432 const axis_t axis = T::axis; | |
10433 const bool axis_has_attributes = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_descendant_or_self || axis == axis_following || axis == axis_parent || axis == axis_preceding || axis == axis_self); | |
10434 | |
10435 if (xn.node()) | |
10436 step_fill(ns, xn.node().internal_object(), alloc, once, v); | |
10437 else if (axis_has_attributes && xn.attribute() && xn.parent()) | |
10438 step_fill(ns, xn.attribute().internal_object(), xn.parent().internal_object(), alloc, once, v); | |
10439 } | |
10440 | |
10441 template <class T> xpath_node_set_raw step_do(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval, T v) | |
10442 { | |
10443 const axis_t axis = T::axis; | |
10444 const bool axis_reverse = (axis == axis_ancestor || axis == axis_ancestor_or_self || axis == axis_preceding || axis == axis_preceding_sibling); | |
10445 const xpath_node_set::type_t axis_type = axis_reverse ? xpath_node_set::type_sorted_reverse : xpath_node_set::type_sorted; | |
10446 | |
10447 bool once = | |
10448 (axis == axis_attribute && _test == nodetest_name) || | |
10449 (!_right && eval_once(axis_type, eval)) || | |
10450 // coverity[mixed_enums] | |
10451 (_right && !_right->_next && _right->_test == predicate_constant_one); | |
10452 | |
10453 xpath_node_set_raw ns; | |
10454 ns.set_type(axis_type); | |
10455 | |
10456 if (_left) | |
10457 { | |
10458 xpath_node_set_raw s = _left->eval_node_set(c, stack, nodeset_eval_all); | |
10459 | |
10460 // self axis preserves the original order | |
10461 if (axis == axis_self) ns.set_type(s.type()); | |
10462 | |
10463 for (const xpath_node* it = s.begin(); it != s.end(); ++it) | |
10464 { | |
10465 size_t size = ns.size(); | |
10466 | |
10467 // in general, all axes generate elements in a particular order, but there is no order guarantee if axis is applied to two nodes | |
10468 if (axis != axis_self && size != 0) ns.set_type(xpath_node_set::type_unsorted); | |
10469 | |
10470 step_fill(ns, *it, stack.result, once, v); | |
10471 if (_right) apply_predicates(ns, size, stack, eval); | |
10472 } | |
10473 } | |
10474 else | |
10475 { | |
10476 step_fill(ns, c.n, stack.result, once, v); | |
10477 if (_right) apply_predicates(ns, 0, stack, eval); | |
10478 } | |
10479 | |
10480 // child, attribute and self axes always generate unique set of nodes | |
10481 // for other axis, if the set stayed sorted, it stayed unique because the traversal algorithms do not visit the same node twice | |
10482 if (axis != axis_child && axis != axis_attribute && axis != axis_self && ns.type() == xpath_node_set::type_unsorted) | |
10483 ns.remove_duplicates(stack.temp); | |
10484 | |
10485 return ns; | |
10486 } | |
10487 | |
10488 public: | |
10489 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, const char_t* value): | |
10490 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10491 { | |
10492 assert(type == ast_string_constant); | |
10493 _data.string = value; | |
10494 } | |
10495 | |
10496 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, double value): | |
10497 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10498 { | |
10499 assert(type == ast_number_constant); | |
10500 _data.number = value; | |
10501 } | |
10502 | |
10503 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_variable* value): | |
10504 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(0), _right(0), _next(0) | |
10505 { | |
10506 assert(type == ast_variable); | |
10507 _data.variable = value; | |
10508 } | |
10509 | |
10510 xpath_ast_node(ast_type_t type, xpath_value_type rettype_, xpath_ast_node* left = 0, xpath_ast_node* right = 0): | |
10511 _type(static_cast<char>(type)), _rettype(static_cast<char>(rettype_)), _axis(0), _test(0), _left(left), _right(right), _next(0) | |
10512 { | |
10513 } | |
10514 | |
10515 xpath_ast_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents): | |
10516 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(static_cast<char>(axis)), _test(static_cast<char>(test)), _left(left), _right(0), _next(0) | |
10517 { | |
10518 assert(type == ast_step); | |
10519 _data.nodetest = contents; | |
10520 } | |
10521 | |
10522 xpath_ast_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test): | |
10523 _type(static_cast<char>(type)), _rettype(xpath_type_node_set), _axis(0), _test(static_cast<char>(test)), _left(left), _right(right), _next(0) | |
10524 { | |
10525 assert(type == ast_filter || type == ast_predicate); | |
10526 } | |
10527 | |
10528 void set_next(xpath_ast_node* value) | |
10529 { | |
10530 _next = value; | |
10531 } | |
10532 | |
10533 void set_right(xpath_ast_node* value) | |
10534 { | |
10535 _right = value; | |
10536 } | |
10537 | |
10538 bool eval_boolean(const xpath_context& c, const xpath_stack& stack) | |
10539 { | |
10540 switch (_type) | |
10541 { | |
10542 case ast_op_or: | |
10543 return _left->eval_boolean(c, stack) || _right->eval_boolean(c, stack); | |
10544 | |
10545 case ast_op_and: | |
10546 return _left->eval_boolean(c, stack) && _right->eval_boolean(c, stack); | |
10547 | |
10548 case ast_op_equal: | |
10549 return compare_eq(_left, _right, c, stack, equal_to()); | |
10550 | |
10551 case ast_op_not_equal: | |
10552 return compare_eq(_left, _right, c, stack, not_equal_to()); | |
10553 | |
10554 case ast_op_less: | |
10555 return compare_rel(_left, _right, c, stack, less()); | |
10556 | |
10557 case ast_op_greater: | |
10558 return compare_rel(_right, _left, c, stack, less()); | |
10559 | |
10560 case ast_op_less_or_equal: | |
10561 return compare_rel(_left, _right, c, stack, less_equal()); | |
10562 | |
10563 case ast_op_greater_or_equal: | |
10564 return compare_rel(_right, _left, c, stack, less_equal()); | |
10565 | |
10566 case ast_func_starts_with: | |
10567 { | |
10568 xpath_allocator_capture cr(stack.result); | |
10569 | |
10570 xpath_string lr = _left->eval_string(c, stack); | |
10571 xpath_string rr = _right->eval_string(c, stack); | |
10572 | |
10573 return starts_with(lr.c_str(), rr.c_str()); | |
10574 } | |
10575 | |
10576 case ast_func_contains: | |
10577 { | |
10578 xpath_allocator_capture cr(stack.result); | |
10579 | |
10580 xpath_string lr = _left->eval_string(c, stack); | |
10581 xpath_string rr = _right->eval_string(c, stack); | |
10582 | |
10583 return find_substring(lr.c_str(), rr.c_str()) != 0; | |
10584 } | |
10585 | |
10586 case ast_func_boolean: | |
10587 return _left->eval_boolean(c, stack); | |
10588 | |
10589 case ast_func_not: | |
10590 return !_left->eval_boolean(c, stack); | |
10591 | |
10592 case ast_func_true: | |
10593 return true; | |
10594 | |
10595 case ast_func_false: | |
10596 return false; | |
10597 | |
10598 case ast_func_lang: | |
10599 { | |
10600 if (c.n.attribute()) return false; | |
10601 | |
10602 xpath_allocator_capture cr(stack.result); | |
10603 | |
10604 xpath_string lang = _left->eval_string(c, stack); | |
10605 | |
10606 for (xml_node n = c.n.node(); n; n = n.parent()) | |
10607 { | |
10608 xml_attribute a = n.attribute(PUGIXML_TEXT("xml:lang")); | |
10609 | |
10610 if (a) | |
10611 { | |
10612 const char_t* value = a.value(); | |
10613 | |
10614 // strnicmp / strncasecmp is not portable | |
10615 for (const char_t* lit = lang.c_str(); *lit; ++lit) | |
10616 { | |
10617 if (tolower_ascii(*lit) != tolower_ascii(*value)) return false; | |
10618 ++value; | |
10619 } | |
10620 | |
10621 return *value == 0 || *value == '-'; | |
10622 } | |
10623 } | |
10624 | |
10625 return false; | |
10626 } | |
10627 | |
10628 case ast_opt_compare_attribute: | |
10629 { | |
10630 const char_t* value = (_right->_type == ast_string_constant) ? _right->_data.string : _right->_data.variable->get_string(); | |
10631 | |
10632 xml_attribute attr = c.n.node().attribute(_left->_data.nodetest); | |
10633 | |
10634 return attr && strequal(attr.value(), value) && is_xpath_attribute(attr.name()); | |
10635 } | |
10636 | |
10637 case ast_variable: | |
10638 { | |
10639 assert(_rettype == _data.variable->type()); | |
10640 | |
10641 if (_rettype == xpath_type_boolean) | |
10642 return _data.variable->get_boolean(); | |
10643 | |
10644 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
10645 break; | |
10646 } | |
10647 | |
10648 default: | |
10649 ; | |
10650 } | |
10651 | |
10652 // none of the ast types that return the value directly matched, we need to perform type conversion | |
10653 switch (_rettype) | |
10654 { | |
10655 case xpath_type_number: | |
10656 return convert_number_to_boolean(eval_number(c, stack)); | |
10657 | |
10658 case xpath_type_string: | |
10659 { | |
10660 xpath_allocator_capture cr(stack.result); | |
10661 | |
10662 return !eval_string(c, stack).empty(); | |
10663 } | |
10664 | |
10665 case xpath_type_node_set: | |
10666 { | |
10667 xpath_allocator_capture cr(stack.result); | |
10668 | |
10669 return !eval_node_set(c, stack, nodeset_eval_any).empty(); | |
10670 } | |
10671 | |
10672 default: | |
10673 assert(false && "Wrong expression for return type boolean"); // unreachable | |
10674 return false; | |
10675 } | |
10676 } | |
10677 | |
10678 double eval_number(const xpath_context& c, const xpath_stack& stack) | |
10679 { | |
10680 switch (_type) | |
10681 { | |
10682 case ast_op_add: | |
10683 return _left->eval_number(c, stack) + _right->eval_number(c, stack); | |
10684 | |
10685 case ast_op_subtract: | |
10686 return _left->eval_number(c, stack) - _right->eval_number(c, stack); | |
10687 | |
10688 case ast_op_multiply: | |
10689 return _left->eval_number(c, stack) * _right->eval_number(c, stack); | |
10690 | |
10691 case ast_op_divide: | |
10692 return _left->eval_number(c, stack) / _right->eval_number(c, stack); | |
10693 | |
10694 case ast_op_mod: | |
10695 return fmod(_left->eval_number(c, stack), _right->eval_number(c, stack)); | |
10696 | |
10697 case ast_op_negate: | |
10698 return -_left->eval_number(c, stack); | |
10699 | |
10700 case ast_number_constant: | |
10701 return _data.number; | |
10702 | |
10703 case ast_func_last: | |
10704 return static_cast<double>(c.size); | |
10705 | |
10706 case ast_func_position: | |
10707 return static_cast<double>(c.position); | |
10708 | |
10709 case ast_func_count: | |
10710 { | |
10711 xpath_allocator_capture cr(stack.result); | |
10712 | |
10713 return static_cast<double>(_left->eval_node_set(c, stack, nodeset_eval_all).size()); | |
10714 } | |
10715 | |
10716 case ast_func_string_length_0: | |
10717 { | |
10718 xpath_allocator_capture cr(stack.result); | |
10719 | |
10720 return static_cast<double>(string_value(c.n, stack.result).length()); | |
10721 } | |
10722 | |
10723 case ast_func_string_length_1: | |
10724 { | |
10725 xpath_allocator_capture cr(stack.result); | |
10726 | |
10727 return static_cast<double>(_left->eval_string(c, stack).length()); | |
10728 } | |
10729 | |
10730 case ast_func_number_0: | |
10731 { | |
10732 xpath_allocator_capture cr(stack.result); | |
10733 | |
10734 return convert_string_to_number(string_value(c.n, stack.result).c_str()); | |
10735 } | |
10736 | |
10737 case ast_func_number_1: | |
10738 return _left->eval_number(c, stack); | |
10739 | |
10740 case ast_func_sum: | |
10741 { | |
10742 xpath_allocator_capture cr(stack.result); | |
10743 | |
10744 double r = 0; | |
10745 | |
10746 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_all); | |
10747 | |
10748 for (const xpath_node* it = ns.begin(); it != ns.end(); ++it) | |
10749 { | |
10750 xpath_allocator_capture cri(stack.result); | |
10751 | |
10752 r += convert_string_to_number(string_value(*it, stack.result).c_str()); | |
10753 } | |
10754 | |
10755 return r; | |
10756 } | |
10757 | |
10758 case ast_func_floor: | |
10759 { | |
10760 double r = _left->eval_number(c, stack); | |
10761 | |
10762 return r == r ? floor(r) : r; | |
10763 } | |
10764 | |
10765 case ast_func_ceiling: | |
10766 { | |
10767 double r = _left->eval_number(c, stack); | |
10768 | |
10769 return r == r ? ceil(r) : r; | |
10770 } | |
10771 | |
10772 case ast_func_round: | |
10773 return round_nearest_nzero(_left->eval_number(c, stack)); | |
10774 | |
10775 case ast_variable: | |
10776 { | |
10777 assert(_rettype == _data.variable->type()); | |
10778 | |
10779 if (_rettype == xpath_type_number) | |
10780 return _data.variable->get_number(); | |
10781 | |
10782 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
10783 break; | |
10784 } | |
10785 | |
10786 default: | |
10787 ; | |
10788 } | |
10789 | |
10790 // none of the ast types that return the value directly matched, we need to perform type conversion | |
10791 switch (_rettype) | |
10792 { | |
10793 case xpath_type_boolean: | |
10794 return eval_boolean(c, stack) ? 1 : 0; | |
10795 | |
10796 case xpath_type_string: | |
10797 { | |
10798 xpath_allocator_capture cr(stack.result); | |
10799 | |
10800 return convert_string_to_number(eval_string(c, stack).c_str()); | |
10801 } | |
10802 | |
10803 case xpath_type_node_set: | |
10804 { | |
10805 xpath_allocator_capture cr(stack.result); | |
10806 | |
10807 return convert_string_to_number(eval_string(c, stack).c_str()); | |
10808 } | |
10809 | |
10810 default: | |
10811 assert(false && "Wrong expression for return type number"); // unreachable | |
10812 return 0; | |
10813 } | |
10814 } | |
10815 | |
10816 xpath_string eval_string_concat(const xpath_context& c, const xpath_stack& stack) | |
10817 { | |
10818 assert(_type == ast_func_concat); | |
10819 | |
10820 xpath_allocator_capture ct(stack.temp); | |
10821 | |
10822 // count the string number | |
10823 size_t count = 1; | |
10824 for (xpath_ast_node* nc = _right; nc; nc = nc->_next) count++; | |
10825 | |
10826 // allocate a buffer for temporary string objects | |
10827 xpath_string* buffer = static_cast<xpath_string*>(stack.temp->allocate(count * sizeof(xpath_string))); | |
10828 if (!buffer) return xpath_string(); | |
10829 | |
10830 // evaluate all strings to temporary stack | |
10831 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10832 | |
10833 buffer[0] = _left->eval_string(c, swapped_stack); | |
10834 | |
10835 size_t pos = 1; | |
10836 for (xpath_ast_node* n = _right; n; n = n->_next, ++pos) buffer[pos] = n->eval_string(c, swapped_stack); | |
10837 assert(pos == count); | |
10838 | |
10839 // get total length | |
10840 size_t length = 0; | |
10841 for (size_t i = 0; i < count; ++i) length += buffer[i].length(); | |
10842 | |
10843 // create final string | |
10844 char_t* result = static_cast<char_t*>(stack.result->allocate((length + 1) * sizeof(char_t))); | |
10845 if (!result) return xpath_string(); | |
10846 | |
10847 char_t* ri = result; | |
10848 | |
10849 for (size_t j = 0; j < count; ++j) | |
10850 for (const char_t* bi = buffer[j].c_str(); *bi; ++bi) | |
10851 *ri++ = *bi; | |
10852 | |
10853 *ri = 0; | |
10854 | |
10855 return xpath_string::from_heap_preallocated(result, ri); | |
10856 } | |
10857 | |
10858 xpath_string eval_string(const xpath_context& c, const xpath_stack& stack) | |
10859 { | |
10860 switch (_type) | |
10861 { | |
10862 case ast_string_constant: | |
10863 return xpath_string::from_const(_data.string); | |
10864 | |
10865 case ast_func_local_name_0: | |
10866 { | |
10867 xpath_node na = c.n; | |
10868 | |
10869 return xpath_string::from_const(local_name(na)); | |
10870 } | |
10871 | |
10872 case ast_func_local_name_1: | |
10873 { | |
10874 xpath_allocator_capture cr(stack.result); | |
10875 | |
10876 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10877 xpath_node na = ns.first(); | |
10878 | |
10879 return xpath_string::from_const(local_name(na)); | |
10880 } | |
10881 | |
10882 case ast_func_name_0: | |
10883 { | |
10884 xpath_node na = c.n; | |
10885 | |
10886 return xpath_string::from_const(qualified_name(na)); | |
10887 } | |
10888 | |
10889 case ast_func_name_1: | |
10890 { | |
10891 xpath_allocator_capture cr(stack.result); | |
10892 | |
10893 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10894 xpath_node na = ns.first(); | |
10895 | |
10896 return xpath_string::from_const(qualified_name(na)); | |
10897 } | |
10898 | |
10899 case ast_func_namespace_uri_0: | |
10900 { | |
10901 xpath_node na = c.n; | |
10902 | |
10903 return xpath_string::from_const(namespace_uri(na)); | |
10904 } | |
10905 | |
10906 case ast_func_namespace_uri_1: | |
10907 { | |
10908 xpath_allocator_capture cr(stack.result); | |
10909 | |
10910 xpath_node_set_raw ns = _left->eval_node_set(c, stack, nodeset_eval_first); | |
10911 xpath_node na = ns.first(); | |
10912 | |
10913 return xpath_string::from_const(namespace_uri(na)); | |
10914 } | |
10915 | |
10916 case ast_func_string_0: | |
10917 return string_value(c.n, stack.result); | |
10918 | |
10919 case ast_func_string_1: | |
10920 return _left->eval_string(c, stack); | |
10921 | |
10922 case ast_func_concat: | |
10923 return eval_string_concat(c, stack); | |
10924 | |
10925 case ast_func_substring_before: | |
10926 { | |
10927 xpath_allocator_capture cr(stack.temp); | |
10928 | |
10929 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10930 | |
10931 xpath_string s = _left->eval_string(c, swapped_stack); | |
10932 xpath_string p = _right->eval_string(c, swapped_stack); | |
10933 | |
10934 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
10935 | |
10936 return pos ? xpath_string::from_heap(s.c_str(), pos, stack.result) : xpath_string(); | |
10937 } | |
10938 | |
10939 case ast_func_substring_after: | |
10940 { | |
10941 xpath_allocator_capture cr(stack.temp); | |
10942 | |
10943 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10944 | |
10945 xpath_string s = _left->eval_string(c, swapped_stack); | |
10946 xpath_string p = _right->eval_string(c, swapped_stack); | |
10947 | |
10948 const char_t* pos = find_substring(s.c_str(), p.c_str()); | |
10949 if (!pos) return xpath_string(); | |
10950 | |
10951 const char_t* rbegin = pos + p.length(); | |
10952 const char_t* rend = s.c_str() + s.length(); | |
10953 | |
10954 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
10955 } | |
10956 | |
10957 case ast_func_substring_2: | |
10958 { | |
10959 xpath_allocator_capture cr(stack.temp); | |
10960 | |
10961 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10962 | |
10963 xpath_string s = _left->eval_string(c, swapped_stack); | |
10964 size_t s_length = s.length(); | |
10965 | |
10966 double first = round_nearest(_right->eval_number(c, stack)); | |
10967 | |
10968 if (is_nan(first)) return xpath_string(); // NaN | |
10969 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
10970 | |
10971 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
10972 assert(1 <= pos && pos <= s_length + 1); | |
10973 | |
10974 const char_t* rbegin = s.c_str() + (pos - 1); | |
10975 const char_t* rend = s.c_str() + s.length(); | |
10976 | |
10977 return s.uses_heap() ? xpath_string::from_heap(rbegin, rend, stack.result) : xpath_string::from_const(rbegin); | |
10978 } | |
10979 | |
10980 case ast_func_substring_3: | |
10981 { | |
10982 xpath_allocator_capture cr(stack.temp); | |
10983 | |
10984 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
10985 | |
10986 xpath_string s = _left->eval_string(c, swapped_stack); | |
10987 size_t s_length = s.length(); | |
10988 | |
10989 double first = round_nearest(_right->eval_number(c, stack)); | |
10990 double last = first + round_nearest(_right->_next->eval_number(c, stack)); | |
10991 | |
10992 if (is_nan(first) || is_nan(last)) return xpath_string(); | |
10993 else if (first >= static_cast<double>(s_length + 1)) return xpath_string(); | |
10994 else if (first >= last) return xpath_string(); | |
10995 else if (last < 1) return xpath_string(); | |
10996 | |
10997 size_t pos = first < 1 ? 1 : static_cast<size_t>(first); | |
10998 size_t end = last >= static_cast<double>(s_length + 1) ? s_length + 1 : static_cast<size_t>(last); | |
10999 | |
11000 assert(1 <= pos && pos <= end && end <= s_length + 1); | |
11001 const char_t* rbegin = s.c_str() + (pos - 1); | |
11002 const char_t* rend = s.c_str() + (end - 1); | |
11003 | |
11004 return (end == s_length + 1 && !s.uses_heap()) ? xpath_string::from_const(rbegin) : xpath_string::from_heap(rbegin, rend, stack.result); | |
11005 } | |
11006 | |
11007 case ast_func_normalize_space_0: | |
11008 { | |
11009 xpath_string s = string_value(c.n, stack.result); | |
11010 | |
11011 char_t* begin = s.data(stack.result); | |
11012 if (!begin) return xpath_string(); | |
11013 | |
11014 char_t* end = normalize_space(begin); | |
11015 | |
11016 return xpath_string::from_heap_preallocated(begin, end); | |
11017 } | |
11018 | |
11019 case ast_func_normalize_space_1: | |
11020 { | |
11021 xpath_string s = _left->eval_string(c, stack); | |
11022 | |
11023 char_t* begin = s.data(stack.result); | |
11024 if (!begin) return xpath_string(); | |
11025 | |
11026 char_t* end = normalize_space(begin); | |
11027 | |
11028 return xpath_string::from_heap_preallocated(begin, end); | |
11029 } | |
11030 | |
11031 case ast_func_translate: | |
11032 { | |
11033 xpath_allocator_capture cr(stack.temp); | |
11034 | |
11035 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
11036 | |
11037 xpath_string s = _left->eval_string(c, stack); | |
11038 xpath_string from = _right->eval_string(c, swapped_stack); | |
11039 xpath_string to = _right->_next->eval_string(c, swapped_stack); | |
11040 | |
11041 char_t* begin = s.data(stack.result); | |
11042 if (!begin) return xpath_string(); | |
11043 | |
11044 char_t* end = translate(begin, from.c_str(), to.c_str(), to.length()); | |
11045 | |
11046 return xpath_string::from_heap_preallocated(begin, end); | |
11047 } | |
11048 | |
11049 case ast_opt_translate_table: | |
11050 { | |
11051 xpath_string s = _left->eval_string(c, stack); | |
11052 | |
11053 char_t* begin = s.data(stack.result); | |
11054 if (!begin) return xpath_string(); | |
11055 | |
11056 char_t* end = translate_table(begin, _data.table); | |
11057 | |
11058 return xpath_string::from_heap_preallocated(begin, end); | |
11059 } | |
11060 | |
11061 case ast_variable: | |
11062 { | |
11063 assert(_rettype == _data.variable->type()); | |
11064 | |
11065 if (_rettype == xpath_type_string) | |
11066 return xpath_string::from_const(_data.variable->get_string()); | |
11067 | |
11068 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
11069 break; | |
11070 } | |
11071 | |
11072 default: | |
11073 ; | |
11074 } | |
11075 | |
11076 // none of the ast types that return the value directly matched, we need to perform type conversion | |
11077 switch (_rettype) | |
11078 { | |
11079 case xpath_type_boolean: | |
11080 return xpath_string::from_const(eval_boolean(c, stack) ? PUGIXML_TEXT("true") : PUGIXML_TEXT("false")); | |
11081 | |
11082 case xpath_type_number: | |
11083 return convert_number_to_string(eval_number(c, stack), stack.result); | |
11084 | |
11085 case xpath_type_node_set: | |
11086 { | |
11087 xpath_allocator_capture cr(stack.temp); | |
11088 | |
11089 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
11090 | |
11091 xpath_node_set_raw ns = eval_node_set(c, swapped_stack, nodeset_eval_first); | |
11092 return ns.empty() ? xpath_string() : string_value(ns.first(), stack.result); | |
11093 } | |
11094 | |
11095 default: | |
11096 assert(false && "Wrong expression for return type string"); // unreachable | |
11097 return xpath_string(); | |
11098 } | |
11099 } | |
11100 | |
11101 xpath_node_set_raw eval_node_set(const xpath_context& c, const xpath_stack& stack, nodeset_eval_t eval) | |
11102 { | |
11103 switch (_type) | |
11104 { | |
11105 case ast_op_union: | |
11106 { | |
11107 xpath_allocator_capture cr(stack.temp); | |
11108 | |
11109 xpath_stack swapped_stack = {stack.temp, stack.result}; | |
11110 | |
11111 xpath_node_set_raw ls = _left->eval_node_set(c, stack, eval); | |
11112 xpath_node_set_raw rs = _right->eval_node_set(c, swapped_stack, eval); | |
11113 | |
11114 // we can optimize merging two sorted sets, but this is a very rare operation, so don't bother | |
11115 ls.set_type(xpath_node_set::type_unsorted); | |
11116 | |
11117 ls.append(rs.begin(), rs.end(), stack.result); | |
11118 ls.remove_duplicates(stack.temp); | |
11119 | |
11120 return ls; | |
11121 } | |
11122 | |
11123 case ast_filter: | |
11124 { | |
11125 xpath_node_set_raw set = _left->eval_node_set(c, stack, _test == predicate_constant_one ? nodeset_eval_first : nodeset_eval_all); | |
11126 | |
11127 // either expression is a number or it contains position() call; sort by document order | |
11128 if (_test != predicate_posinv) set.sort_do(); | |
11129 | |
11130 bool once = eval_once(set.type(), eval); | |
11131 | |
11132 apply_predicate(set, 0, stack, once); | |
11133 | |
11134 return set; | |
11135 } | |
11136 | |
11137 case ast_func_id: | |
11138 return xpath_node_set_raw(); | |
11139 | |
11140 case ast_step: | |
11141 { | |
11142 switch (_axis) | |
11143 { | |
11144 case axis_ancestor: | |
11145 return step_do(c, stack, eval, axis_to_type<axis_ancestor>()); | |
11146 | |
11147 case axis_ancestor_or_self: | |
11148 return step_do(c, stack, eval, axis_to_type<axis_ancestor_or_self>()); | |
11149 | |
11150 case axis_attribute: | |
11151 return step_do(c, stack, eval, axis_to_type<axis_attribute>()); | |
11152 | |
11153 case axis_child: | |
11154 return step_do(c, stack, eval, axis_to_type<axis_child>()); | |
11155 | |
11156 case axis_descendant: | |
11157 return step_do(c, stack, eval, axis_to_type<axis_descendant>()); | |
11158 | |
11159 case axis_descendant_or_self: | |
11160 return step_do(c, stack, eval, axis_to_type<axis_descendant_or_self>()); | |
11161 | |
11162 case axis_following: | |
11163 return step_do(c, stack, eval, axis_to_type<axis_following>()); | |
11164 | |
11165 case axis_following_sibling: | |
11166 return step_do(c, stack, eval, axis_to_type<axis_following_sibling>()); | |
11167 | |
11168 case axis_namespace: | |
11169 // namespaced axis is not supported | |
11170 return xpath_node_set_raw(); | |
11171 | |
11172 case axis_parent: | |
11173 return step_do(c, stack, eval, axis_to_type<axis_parent>()); | |
11174 | |
11175 case axis_preceding: | |
11176 return step_do(c, stack, eval, axis_to_type<axis_preceding>()); | |
11177 | |
11178 case axis_preceding_sibling: | |
11179 return step_do(c, stack, eval, axis_to_type<axis_preceding_sibling>()); | |
11180 | |
11181 case axis_self: | |
11182 return step_do(c, stack, eval, axis_to_type<axis_self>()); | |
11183 | |
11184 default: | |
11185 assert(false && "Unknown axis"); // unreachable | |
11186 return xpath_node_set_raw(); | |
11187 } | |
11188 } | |
11189 | |
11190 case ast_step_root: | |
11191 { | |
11192 assert(!_right); // root step can't have any predicates | |
11193 | |
11194 xpath_node_set_raw ns; | |
11195 | |
11196 ns.set_type(xpath_node_set::type_sorted); | |
11197 | |
11198 if (c.n.node()) ns.push_back(c.n.node().root(), stack.result); | |
11199 else if (c.n.attribute()) ns.push_back(c.n.parent().root(), stack.result); | |
11200 | |
11201 return ns; | |
11202 } | |
11203 | |
11204 case ast_variable: | |
11205 { | |
11206 assert(_rettype == _data.variable->type()); | |
11207 | |
11208 if (_rettype == xpath_type_node_set) | |
11209 { | |
11210 const xpath_node_set& s = _data.variable->get_node_set(); | |
11211 | |
11212 xpath_node_set_raw ns; | |
11213 | |
11214 ns.set_type(s.type()); | |
11215 ns.append(s.begin(), s.end(), stack.result); | |
11216 | |
11217 return ns; | |
11218 } | |
11219 | |
11220 // variable needs to be converted to the correct type, this is handled by the fallthrough block below | |
11221 break; | |
11222 } | |
11223 | |
11224 default: | |
11225 ; | |
11226 } | |
11227 | |
11228 // none of the ast types that return the value directly matched, but conversions to node set are invalid | |
11229 assert(false && "Wrong expression for return type node set"); // unreachable | |
11230 return xpath_node_set_raw(); | |
11231 } | |
11232 | |
11233 void optimize(xpath_allocator* alloc) | |
11234 { | |
11235 if (_left) | |
11236 _left->optimize(alloc); | |
11237 | |
11238 if (_right) | |
11239 _right->optimize(alloc); | |
11240 | |
11241 if (_next) | |
11242 _next->optimize(alloc); | |
11243 | |
11244 // coverity[var_deref_model] | |
11245 optimize_self(alloc); | |
11246 } | |
11247 | |
11248 void optimize_self(xpath_allocator* alloc) | |
11249 { | |
11250 // Rewrite [position()=expr] with [expr] | |
11251 // Note that this step has to go before classification to recognize [position()=1] | |
11252 if ((_type == ast_filter || _type == ast_predicate) && | |
11253 _right && // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
11254 _right->_type == ast_op_equal && _right->_left->_type == ast_func_position && _right->_right->_rettype == xpath_type_number) | |
11255 { | |
11256 _right = _right->_right; | |
11257 } | |
11258 | |
11259 // Classify filter/predicate ops to perform various optimizations during evaluation | |
11260 if ((_type == ast_filter || _type == ast_predicate) && _right) // workaround for clang static analyzer (_right is never null for ast_filter/ast_predicate) | |
11261 { | |
11262 assert(_test == predicate_default); | |
11263 | |
11264 if (_right->_type == ast_number_constant && _right->_data.number == 1.0) | |
11265 _test = predicate_constant_one; | |
11266 else if (_right->_rettype == xpath_type_number && (_right->_type == ast_number_constant || _right->_type == ast_variable || _right->_type == ast_func_last)) | |
11267 _test = predicate_constant; | |
11268 else if (_right->_rettype != xpath_type_number && _right->is_posinv_expr()) | |
11269 _test = predicate_posinv; | |
11270 } | |
11271 | |
11272 // Rewrite descendant-or-self::node()/child::foo with descendant::foo | |
11273 // The former is a full form of //foo, the latter is much faster since it executes the node test immediately | |
11274 // Do a similar kind of rewrite for self/descendant/descendant-or-self axes | |
11275 // Note that we only rewrite positionally invariant steps (//foo[1] != /descendant::foo[1]) | |
11276 if (_type == ast_step && (_axis == axis_child || _axis == axis_self || _axis == axis_descendant || _axis == axis_descendant_or_self) && | |
11277 _left && _left->_type == ast_step && _left->_axis == axis_descendant_or_self && _left->_test == nodetest_type_node && !_left->_right && | |
11278 is_posinv_step()) | |
11279 { | |
11280 if (_axis == axis_child || _axis == axis_descendant) | |
11281 _axis = axis_descendant; | |
11282 else | |
11283 _axis = axis_descendant_or_self; | |
11284 | |
11285 _left = _left->_left; | |
11286 } | |
11287 | |
11288 // Use optimized lookup table implementation for translate() with constant arguments | |
11289 if (_type == ast_func_translate && | |
11290 _right && // workaround for clang static analyzer (_right is never null for ast_func_translate) | |
11291 _right->_type == ast_string_constant && _right->_next->_type == ast_string_constant) | |
11292 { | |
11293 unsigned char* table = translate_table_generate(alloc, _right->_data.string, _right->_next->_data.string); | |
11294 | |
11295 if (table) | |
11296 { | |
11297 _type = ast_opt_translate_table; | |
11298 _data.table = table; | |
11299 } | |
11300 } | |
11301 | |
11302 // Use optimized path for @attr = 'value' or @attr = $value | |
11303 if (_type == ast_op_equal && | |
11304 _left && _right && // workaround for clang static analyzer and Coverity (_left and _right are never null for ast_op_equal) | |
11305 // coverity[mixed_enums] | |
11306 _left->_type == ast_step && _left->_axis == axis_attribute && _left->_test == nodetest_name && !_left->_left && !_left->_right && | |
11307 (_right->_type == ast_string_constant || (_right->_type == ast_variable && _right->_rettype == xpath_type_string))) | |
11308 { | |
11309 _type = ast_opt_compare_attribute; | |
11310 } | |
11311 } | |
11312 | |
11313 bool is_posinv_expr() const | |
11314 { | |
11315 switch (_type) | |
11316 { | |
11317 case ast_func_position: | |
11318 case ast_func_last: | |
11319 return false; | |
11320 | |
11321 case ast_string_constant: | |
11322 case ast_number_constant: | |
11323 case ast_variable: | |
11324 return true; | |
11325 | |
11326 case ast_step: | |
11327 case ast_step_root: | |
11328 return true; | |
11329 | |
11330 case ast_predicate: | |
11331 case ast_filter: | |
11332 return true; | |
11333 | |
11334 default: | |
11335 if (_left && !_left->is_posinv_expr()) return false; | |
11336 | |
11337 for (xpath_ast_node* n = _right; n; n = n->_next) | |
11338 if (!n->is_posinv_expr()) return false; | |
11339 | |
11340 return true; | |
11341 } | |
11342 } | |
11343 | |
11344 bool is_posinv_step() const | |
11345 { | |
11346 assert(_type == ast_step); | |
11347 | |
11348 for (xpath_ast_node* n = _right; n; n = n->_next) | |
11349 { | |
11350 assert(n->_type == ast_predicate); | |
11351 | |
11352 if (n->_test != predicate_posinv) | |
11353 return false; | |
11354 } | |
11355 | |
11356 return true; | |
11357 } | |
11358 | |
11359 xpath_value_type rettype() const | |
11360 { | |
11361 return static_cast<xpath_value_type>(_rettype); | |
11362 } | |
11363 }; | |
11364 | |
11365 static const size_t xpath_ast_depth_limit = | |
11366 #ifdef PUGIXML_XPATH_DEPTH_LIMIT | |
11367 PUGIXML_XPATH_DEPTH_LIMIT | |
11368 #else | |
11369 1024 | |
11370 #endif | |
11371 ; | |
11372 | |
11373 struct xpath_parser | |
11374 { | |
11375 xpath_allocator* _alloc; | |
11376 xpath_lexer _lexer; | |
11377 | |
11378 const char_t* _query; | |
11379 xpath_variable_set* _variables; | |
11380 | |
11381 xpath_parse_result* _result; | |
11382 | |
11383 char_t _scratch[32]; | |
11384 | |
11385 size_t _depth; | |
11386 | |
11387 xpath_ast_node* error(const char* message) | |
11388 { | |
11389 _result->error = message; | |
11390 _result->offset = _lexer.current_pos() - _query; | |
11391 | |
11392 return 0; | |
11393 } | |
11394 | |
11395 xpath_ast_node* error_oom() | |
11396 { | |
11397 assert(_alloc->_error); | |
11398 *_alloc->_error = true; | |
11399 | |
11400 return 0; | |
11401 } | |
11402 | |
11403 xpath_ast_node* error_rec() | |
11404 { | |
11405 return error("Exceeded maximum allowed query depth"); | |
11406 } | |
11407 | |
11408 void* alloc_node() | |
11409 { | |
11410 return _alloc->allocate(sizeof(xpath_ast_node)); | |
11411 } | |
11412 | |
11413 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, const char_t* value) | |
11414 { | |
11415 void* memory = alloc_node(); | |
11416 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11417 } | |
11418 | |
11419 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, double value) | |
11420 { | |
11421 void* memory = alloc_node(); | |
11422 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11423 } | |
11424 | |
11425 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_variable* value) | |
11426 { | |
11427 void* memory = alloc_node(); | |
11428 return memory ? new (memory) xpath_ast_node(type, rettype, value) : 0; | |
11429 } | |
11430 | |
11431 xpath_ast_node* alloc_node(ast_type_t type, xpath_value_type rettype, xpath_ast_node* left = 0, xpath_ast_node* right = 0) | |
11432 { | |
11433 void* memory = alloc_node(); | |
11434 return memory ? new (memory) xpath_ast_node(type, rettype, left, right) : 0; | |
11435 } | |
11436 | |
11437 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, axis_t axis, nodetest_t test, const char_t* contents) | |
11438 { | |
11439 void* memory = alloc_node(); | |
11440 return memory ? new (memory) xpath_ast_node(type, left, axis, test, contents) : 0; | |
11441 } | |
11442 | |
11443 xpath_ast_node* alloc_node(ast_type_t type, xpath_ast_node* left, xpath_ast_node* right, predicate_t test) | |
11444 { | |
11445 void* memory = alloc_node(); | |
11446 return memory ? new (memory) xpath_ast_node(type, left, right, test) : 0; | |
11447 } | |
11448 | |
11449 const char_t* alloc_string(const xpath_lexer_string& value) | |
11450 { | |
11451 if (!value.begin) | |
11452 return PUGIXML_TEXT(""); | |
11453 | |
11454 size_t length = static_cast<size_t>(value.end - value.begin); | |
11455 | |
11456 char_t* c = static_cast<char_t*>(_alloc->allocate((length + 1) * sizeof(char_t))); | |
11457 if (!c) return 0; | |
11458 | |
11459 memcpy(c, value.begin, length * sizeof(char_t)); | |
11460 c[length] = 0; | |
11461 | |
11462 return c; | |
11463 } | |
11464 | |
11465 xpath_ast_node* parse_function(const xpath_lexer_string& name, size_t argc, xpath_ast_node* args[2]) | |
11466 { | |
11467 switch (name.begin[0]) | |
11468 { | |
11469 case 'b': | |
11470 if (name == PUGIXML_TEXT("boolean") && argc == 1) | |
11471 return alloc_node(ast_func_boolean, xpath_type_boolean, args[0]); | |
11472 | |
11473 break; | |
11474 | |
11475 case 'c': | |
11476 if (name == PUGIXML_TEXT("count") && argc == 1) | |
11477 { | |
11478 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11479 return alloc_node(ast_func_count, xpath_type_number, args[0]); | |
11480 } | |
11481 else if (name == PUGIXML_TEXT("contains") && argc == 2) | |
11482 return alloc_node(ast_func_contains, xpath_type_boolean, args[0], args[1]); | |
11483 else if (name == PUGIXML_TEXT("concat") && argc >= 2) | |
11484 return alloc_node(ast_func_concat, xpath_type_string, args[0], args[1]); | |
11485 else if (name == PUGIXML_TEXT("ceiling") && argc == 1) | |
11486 return alloc_node(ast_func_ceiling, xpath_type_number, args[0]); | |
11487 | |
11488 break; | |
11489 | |
11490 case 'f': | |
11491 if (name == PUGIXML_TEXT("false") && argc == 0) | |
11492 return alloc_node(ast_func_false, xpath_type_boolean); | |
11493 else if (name == PUGIXML_TEXT("floor") && argc == 1) | |
11494 return alloc_node(ast_func_floor, xpath_type_number, args[0]); | |
11495 | |
11496 break; | |
11497 | |
11498 case 'i': | |
11499 if (name == PUGIXML_TEXT("id") && argc == 1) | |
11500 return alloc_node(ast_func_id, xpath_type_node_set, args[0]); | |
11501 | |
11502 break; | |
11503 | |
11504 case 'l': | |
11505 if (name == PUGIXML_TEXT("last") && argc == 0) | |
11506 return alloc_node(ast_func_last, xpath_type_number); | |
11507 else if (name == PUGIXML_TEXT("lang") && argc == 1) | |
11508 return alloc_node(ast_func_lang, xpath_type_boolean, args[0]); | |
11509 else if (name == PUGIXML_TEXT("local-name") && argc <= 1) | |
11510 { | |
11511 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11512 return alloc_node(argc == 0 ? ast_func_local_name_0 : ast_func_local_name_1, xpath_type_string, args[0]); | |
11513 } | |
11514 | |
11515 break; | |
11516 | |
11517 case 'n': | |
11518 if (name == PUGIXML_TEXT("name") && argc <= 1) | |
11519 { | |
11520 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11521 return alloc_node(argc == 0 ? ast_func_name_0 : ast_func_name_1, xpath_type_string, args[0]); | |
11522 } | |
11523 else if (name == PUGIXML_TEXT("namespace-uri") && argc <= 1) | |
11524 { | |
11525 if (argc == 1 && args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11526 return alloc_node(argc == 0 ? ast_func_namespace_uri_0 : ast_func_namespace_uri_1, xpath_type_string, args[0]); | |
11527 } | |
11528 else if (name == PUGIXML_TEXT("normalize-space") && argc <= 1) | |
11529 return alloc_node(argc == 0 ? ast_func_normalize_space_0 : ast_func_normalize_space_1, xpath_type_string, args[0], args[1]); | |
11530 else if (name == PUGIXML_TEXT("not") && argc == 1) | |
11531 return alloc_node(ast_func_not, xpath_type_boolean, args[0]); | |
11532 else if (name == PUGIXML_TEXT("number") && argc <= 1) | |
11533 return alloc_node(argc == 0 ? ast_func_number_0 : ast_func_number_1, xpath_type_number, args[0]); | |
11534 | |
11535 break; | |
11536 | |
11537 case 'p': | |
11538 if (name == PUGIXML_TEXT("position") && argc == 0) | |
11539 return alloc_node(ast_func_position, xpath_type_number); | |
11540 | |
11541 break; | |
11542 | |
11543 case 'r': | |
11544 if (name == PUGIXML_TEXT("round") && argc == 1) | |
11545 return alloc_node(ast_func_round, xpath_type_number, args[0]); | |
11546 | |
11547 break; | |
11548 | |
11549 case 's': | |
11550 if (name == PUGIXML_TEXT("string") && argc <= 1) | |
11551 return alloc_node(argc == 0 ? ast_func_string_0 : ast_func_string_1, xpath_type_string, args[0]); | |
11552 else if (name == PUGIXML_TEXT("string-length") && argc <= 1) | |
11553 return alloc_node(argc == 0 ? ast_func_string_length_0 : ast_func_string_length_1, xpath_type_number, args[0]); | |
11554 else if (name == PUGIXML_TEXT("starts-with") && argc == 2) | |
11555 return alloc_node(ast_func_starts_with, xpath_type_boolean, args[0], args[1]); | |
11556 else if (name == PUGIXML_TEXT("substring-before") && argc == 2) | |
11557 return alloc_node(ast_func_substring_before, xpath_type_string, args[0], args[1]); | |
11558 else if (name == PUGIXML_TEXT("substring-after") && argc == 2) | |
11559 return alloc_node(ast_func_substring_after, xpath_type_string, args[0], args[1]); | |
11560 else if (name == PUGIXML_TEXT("substring") && (argc == 2 || argc == 3)) | |
11561 return alloc_node(argc == 2 ? ast_func_substring_2 : ast_func_substring_3, xpath_type_string, args[0], args[1]); | |
11562 else if (name == PUGIXML_TEXT("sum") && argc == 1) | |
11563 { | |
11564 if (args[0]->rettype() != xpath_type_node_set) return error("Function has to be applied to node set"); | |
11565 return alloc_node(ast_func_sum, xpath_type_number, args[0]); | |
11566 } | |
11567 | |
11568 break; | |
11569 | |
11570 case 't': | |
11571 if (name == PUGIXML_TEXT("translate") && argc == 3) | |
11572 return alloc_node(ast_func_translate, xpath_type_string, args[0], args[1]); | |
11573 else if (name == PUGIXML_TEXT("true") && argc == 0) | |
11574 return alloc_node(ast_func_true, xpath_type_boolean); | |
11575 | |
11576 break; | |
11577 | |
11578 default: | |
11579 break; | |
11580 } | |
11581 | |
11582 return error("Unrecognized function or wrong parameter count"); | |
11583 } | |
11584 | |
11585 axis_t parse_axis_name(const xpath_lexer_string& name, bool& specified) | |
11586 { | |
11587 specified = true; | |
11588 | |
11589 switch (name.begin[0]) | |
11590 { | |
11591 case 'a': | |
11592 if (name == PUGIXML_TEXT("ancestor")) | |
11593 return axis_ancestor; | |
11594 else if (name == PUGIXML_TEXT("ancestor-or-self")) | |
11595 return axis_ancestor_or_self; | |
11596 else if (name == PUGIXML_TEXT("attribute")) | |
11597 return axis_attribute; | |
11598 | |
11599 break; | |
11600 | |
11601 case 'c': | |
11602 if (name == PUGIXML_TEXT("child")) | |
11603 return axis_child; | |
11604 | |
11605 break; | |
11606 | |
11607 case 'd': | |
11608 if (name == PUGIXML_TEXT("descendant")) | |
11609 return axis_descendant; | |
11610 else if (name == PUGIXML_TEXT("descendant-or-self")) | |
11611 return axis_descendant_or_self; | |
11612 | |
11613 break; | |
11614 | |
11615 case 'f': | |
11616 if (name == PUGIXML_TEXT("following")) | |
11617 return axis_following; | |
11618 else if (name == PUGIXML_TEXT("following-sibling")) | |
11619 return axis_following_sibling; | |
11620 | |
11621 break; | |
11622 | |
11623 case 'n': | |
11624 if (name == PUGIXML_TEXT("namespace")) | |
11625 return axis_namespace; | |
11626 | |
11627 break; | |
11628 | |
11629 case 'p': | |
11630 if (name == PUGIXML_TEXT("parent")) | |
11631 return axis_parent; | |
11632 else if (name == PUGIXML_TEXT("preceding")) | |
11633 return axis_preceding; | |
11634 else if (name == PUGIXML_TEXT("preceding-sibling")) | |
11635 return axis_preceding_sibling; | |
11636 | |
11637 break; | |
11638 | |
11639 case 's': | |
11640 if (name == PUGIXML_TEXT("self")) | |
11641 return axis_self; | |
11642 | |
11643 break; | |
11644 | |
11645 default: | |
11646 break; | |
11647 } | |
11648 | |
11649 specified = false; | |
11650 return axis_child; | |
11651 } | |
11652 | |
11653 nodetest_t parse_node_test_type(const xpath_lexer_string& name) | |
11654 { | |
11655 switch (name.begin[0]) | |
11656 { | |
11657 case 'c': | |
11658 if (name == PUGIXML_TEXT("comment")) | |
11659 return nodetest_type_comment; | |
11660 | |
11661 break; | |
11662 | |
11663 case 'n': | |
11664 if (name == PUGIXML_TEXT("node")) | |
11665 return nodetest_type_node; | |
11666 | |
11667 break; | |
11668 | |
11669 case 'p': | |
11670 if (name == PUGIXML_TEXT("processing-instruction")) | |
11671 return nodetest_type_pi; | |
11672 | |
11673 break; | |
11674 | |
11675 case 't': | |
11676 if (name == PUGIXML_TEXT("text")) | |
11677 return nodetest_type_text; | |
11678 | |
11679 break; | |
11680 | |
11681 default: | |
11682 break; | |
11683 } | |
11684 | |
11685 return nodetest_none; | |
11686 } | |
11687 | |
11688 // PrimaryExpr ::= VariableReference | '(' Expr ')' | Literal | Number | FunctionCall | |
11689 xpath_ast_node* parse_primary_expression() | |
11690 { | |
11691 switch (_lexer.current()) | |
11692 { | |
11693 case lex_var_ref: | |
11694 { | |
11695 xpath_lexer_string name = _lexer.contents(); | |
11696 | |
11697 if (!_variables) | |
11698 return error("Unknown variable: variable set is not provided"); | |
11699 | |
11700 xpath_variable* var = 0; | |
11701 if (!get_variable_scratch(_scratch, _variables, name.begin, name.end, &var)) | |
11702 return error_oom(); | |
11703 | |
11704 if (!var) | |
11705 return error("Unknown variable: variable set does not contain the given name"); | |
11706 | |
11707 _lexer.next(); | |
11708 | |
11709 return alloc_node(ast_variable, var->type(), var); | |
11710 } | |
11711 | |
11712 case lex_open_brace: | |
11713 { | |
11714 _lexer.next(); | |
11715 | |
11716 xpath_ast_node* n = parse_expression(); | |
11717 if (!n) return 0; | |
11718 | |
11719 if (_lexer.current() != lex_close_brace) | |
11720 return error("Expected ')' to match an opening '('"); | |
11721 | |
11722 _lexer.next(); | |
11723 | |
11724 return n; | |
11725 } | |
11726 | |
11727 case lex_quoted_string: | |
11728 { | |
11729 const char_t* value = alloc_string(_lexer.contents()); | |
11730 if (!value) return 0; | |
11731 | |
11732 _lexer.next(); | |
11733 | |
11734 return alloc_node(ast_string_constant, xpath_type_string, value); | |
11735 } | |
11736 | |
11737 case lex_number: | |
11738 { | |
11739 double value = 0; | |
11740 | |
11741 if (!convert_string_to_number_scratch(_scratch, _lexer.contents().begin, _lexer.contents().end, &value)) | |
11742 return error_oom(); | |
11743 | |
11744 _lexer.next(); | |
11745 | |
11746 return alloc_node(ast_number_constant, xpath_type_number, value); | |
11747 } | |
11748 | |
11749 case lex_string: | |
11750 { | |
11751 xpath_ast_node* args[2] = {0}; | |
11752 size_t argc = 0; | |
11753 | |
11754 xpath_lexer_string function = _lexer.contents(); | |
11755 _lexer.next(); | |
11756 | |
11757 xpath_ast_node* last_arg = 0; | |
11758 | |
11759 if (_lexer.current() != lex_open_brace) | |
11760 return error("Unrecognized function call"); | |
11761 _lexer.next(); | |
11762 | |
11763 size_t old_depth = _depth; | |
11764 | |
11765 while (_lexer.current() != lex_close_brace) | |
11766 { | |
11767 if (argc > 0) | |
11768 { | |
11769 if (_lexer.current() != lex_comma) | |
11770 return error("No comma between function arguments"); | |
11771 _lexer.next(); | |
11772 } | |
11773 | |
11774 if (++_depth > xpath_ast_depth_limit) | |
11775 return error_rec(); | |
11776 | |
11777 xpath_ast_node* n = parse_expression(); | |
11778 if (!n) return 0; | |
11779 | |
11780 if (argc < 2) args[argc] = n; | |
11781 else last_arg->set_next(n); | |
11782 | |
11783 argc++; | |
11784 last_arg = n; | |
11785 } | |
11786 | |
11787 _lexer.next(); | |
11788 | |
11789 _depth = old_depth; | |
11790 | |
11791 return parse_function(function, argc, args); | |
11792 } | |
11793 | |
11794 default: | |
11795 return error("Unrecognizable primary expression"); | |
11796 } | |
11797 } | |
11798 | |
11799 // FilterExpr ::= PrimaryExpr | FilterExpr Predicate | |
11800 // Predicate ::= '[' PredicateExpr ']' | |
11801 // PredicateExpr ::= Expr | |
11802 xpath_ast_node* parse_filter_expression() | |
11803 { | |
11804 xpath_ast_node* n = parse_primary_expression(); | |
11805 if (!n) return 0; | |
11806 | |
11807 size_t old_depth = _depth; | |
11808 | |
11809 while (_lexer.current() == lex_open_square_brace) | |
11810 { | |
11811 _lexer.next(); | |
11812 | |
11813 if (++_depth > xpath_ast_depth_limit) | |
11814 return error_rec(); | |
11815 | |
11816 if (n->rettype() != xpath_type_node_set) | |
11817 return error("Predicate has to be applied to node set"); | |
11818 | |
11819 xpath_ast_node* expr = parse_expression(); | |
11820 if (!expr) return 0; | |
11821 | |
11822 n = alloc_node(ast_filter, n, expr, predicate_default); | |
11823 if (!n) return 0; | |
11824 | |
11825 if (_lexer.current() != lex_close_square_brace) | |
11826 return error("Expected ']' to match an opening '['"); | |
11827 | |
11828 _lexer.next(); | |
11829 } | |
11830 | |
11831 _depth = old_depth; | |
11832 | |
11833 return n; | |
11834 } | |
11835 | |
11836 // Step ::= AxisSpecifier NodeTest Predicate* | AbbreviatedStep | |
11837 // AxisSpecifier ::= AxisName '::' | '@'? | |
11838 // NodeTest ::= NameTest | NodeType '(' ')' | 'processing-instruction' '(' Literal ')' | |
11839 // NameTest ::= '*' | NCName ':' '*' | QName | |
11840 // AbbreviatedStep ::= '.' | '..' | |
11841 xpath_ast_node* parse_step(xpath_ast_node* set) | |
11842 { | |
11843 if (set && set->rettype() != xpath_type_node_set) | |
11844 return error("Step has to be applied to node set"); | |
11845 | |
11846 bool axis_specified = false; | |
11847 axis_t axis = axis_child; // implied child axis | |
11848 | |
11849 if (_lexer.current() == lex_axis_attribute) | |
11850 { | |
11851 axis = axis_attribute; | |
11852 axis_specified = true; | |
11853 | |
11854 _lexer.next(); | |
11855 } | |
11856 else if (_lexer.current() == lex_dot) | |
11857 { | |
11858 _lexer.next(); | |
11859 | |
11860 if (_lexer.current() == lex_open_square_brace) | |
11861 return error("Predicates are not allowed after an abbreviated step"); | |
11862 | |
11863 return alloc_node(ast_step, set, axis_self, nodetest_type_node, 0); | |
11864 } | |
11865 else if (_lexer.current() == lex_double_dot) | |
11866 { | |
11867 _lexer.next(); | |
11868 | |
11869 if (_lexer.current() == lex_open_square_brace) | |
11870 return error("Predicates are not allowed after an abbreviated step"); | |
11871 | |
11872 return alloc_node(ast_step, set, axis_parent, nodetest_type_node, 0); | |
11873 } | |
11874 | |
11875 nodetest_t nt_type = nodetest_none; | |
11876 xpath_lexer_string nt_name; | |
11877 | |
11878 if (_lexer.current() == lex_string) | |
11879 { | |
11880 // node name test | |
11881 nt_name = _lexer.contents(); | |
11882 _lexer.next(); | |
11883 | |
11884 // was it an axis name? | |
11885 if (_lexer.current() == lex_double_colon) | |
11886 { | |
11887 // parse axis name | |
11888 if (axis_specified) | |
11889 return error("Two axis specifiers in one step"); | |
11890 | |
11891 axis = parse_axis_name(nt_name, axis_specified); | |
11892 | |
11893 if (!axis_specified) | |
11894 return error("Unknown axis"); | |
11895 | |
11896 // read actual node test | |
11897 _lexer.next(); | |
11898 | |
11899 if (_lexer.current() == lex_multiply) | |
11900 { | |
11901 nt_type = nodetest_all; | |
11902 nt_name = xpath_lexer_string(); | |
11903 _lexer.next(); | |
11904 } | |
11905 else if (_lexer.current() == lex_string) | |
11906 { | |
11907 nt_name = _lexer.contents(); | |
11908 _lexer.next(); | |
11909 } | |
11910 else | |
11911 { | |
11912 return error("Unrecognized node test"); | |
11913 } | |
11914 } | |
11915 | |
11916 if (nt_type == nodetest_none) | |
11917 { | |
11918 // node type test or processing-instruction | |
11919 if (_lexer.current() == lex_open_brace) | |
11920 { | |
11921 _lexer.next(); | |
11922 | |
11923 if (_lexer.current() == lex_close_brace) | |
11924 { | |
11925 _lexer.next(); | |
11926 | |
11927 nt_type = parse_node_test_type(nt_name); | |
11928 | |
11929 if (nt_type == nodetest_none) | |
11930 return error("Unrecognized node type"); | |
11931 | |
11932 nt_name = xpath_lexer_string(); | |
11933 } | |
11934 else if (nt_name == PUGIXML_TEXT("processing-instruction")) | |
11935 { | |
11936 if (_lexer.current() != lex_quoted_string) | |
11937 return error("Only literals are allowed as arguments to processing-instruction()"); | |
11938 | |
11939 nt_type = nodetest_pi; | |
11940 nt_name = _lexer.contents(); | |
11941 _lexer.next(); | |
11942 | |
11943 if (_lexer.current() != lex_close_brace) | |
11944 return error("Unmatched brace near processing-instruction()"); | |
11945 _lexer.next(); | |
11946 } | |
11947 else | |
11948 { | |
11949 return error("Unmatched brace near node type test"); | |
11950 } | |
11951 } | |
11952 // QName or NCName:* | |
11953 else | |
11954 { | |
11955 if (nt_name.end - nt_name.begin > 2 && nt_name.end[-2] == ':' && nt_name.end[-1] == '*') // NCName:* | |
11956 { | |
11957 nt_name.end--; // erase * | |
11958 | |
11959 nt_type = nodetest_all_in_namespace; | |
11960 } | |
11961 else | |
11962 { | |
11963 nt_type = nodetest_name; | |
11964 } | |
11965 } | |
11966 } | |
11967 } | |
11968 else if (_lexer.current() == lex_multiply) | |
11969 { | |
11970 nt_type = nodetest_all; | |
11971 _lexer.next(); | |
11972 } | |
11973 else | |
11974 { | |
11975 return error("Unrecognized node test"); | |
11976 } | |
11977 | |
11978 const char_t* nt_name_copy = alloc_string(nt_name); | |
11979 if (!nt_name_copy) return 0; | |
11980 | |
11981 xpath_ast_node* n = alloc_node(ast_step, set, axis, nt_type, nt_name_copy); | |
11982 if (!n) return 0; | |
11983 | |
11984 size_t old_depth = _depth; | |
11985 | |
11986 xpath_ast_node* last = 0; | |
11987 | |
11988 while (_lexer.current() == lex_open_square_brace) | |
11989 { | |
11990 _lexer.next(); | |
11991 | |
11992 if (++_depth > xpath_ast_depth_limit) | |
11993 return error_rec(); | |
11994 | |
11995 xpath_ast_node* expr = parse_expression(); | |
11996 if (!expr) return 0; | |
11997 | |
11998 xpath_ast_node* pred = alloc_node(ast_predicate, 0, expr, predicate_default); | |
11999 if (!pred) return 0; | |
12000 | |
12001 if (_lexer.current() != lex_close_square_brace) | |
12002 return error("Expected ']' to match an opening '['"); | |
12003 _lexer.next(); | |
12004 | |
12005 if (last) last->set_next(pred); | |
12006 else n->set_right(pred); | |
12007 | |
12008 last = pred; | |
12009 } | |
12010 | |
12011 _depth = old_depth; | |
12012 | |
12013 return n; | |
12014 } | |
12015 | |
12016 // RelativeLocationPath ::= Step | RelativeLocationPath '/' Step | RelativeLocationPath '//' Step | |
12017 xpath_ast_node* parse_relative_location_path(xpath_ast_node* set) | |
12018 { | |
12019 xpath_ast_node* n = parse_step(set); | |
12020 if (!n) return 0; | |
12021 | |
12022 size_t old_depth = _depth; | |
12023 | |
12024 while (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
12025 { | |
12026 lexeme_t l = _lexer.current(); | |
12027 _lexer.next(); | |
12028 | |
12029 if (l == lex_double_slash) | |
12030 { | |
12031 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
12032 if (!n) return 0; | |
12033 | |
12034 ++_depth; | |
12035 } | |
12036 | |
12037 if (++_depth > xpath_ast_depth_limit) | |
12038 return error_rec(); | |
12039 | |
12040 n = parse_step(n); | |
12041 if (!n) return 0; | |
12042 } | |
12043 | |
12044 _depth = old_depth; | |
12045 | |
12046 return n; | |
12047 } | |
12048 | |
12049 // LocationPath ::= RelativeLocationPath | AbsoluteLocationPath | |
12050 // AbsoluteLocationPath ::= '/' RelativeLocationPath? | '//' RelativeLocationPath | |
12051 xpath_ast_node* parse_location_path() | |
12052 { | |
12053 if (_lexer.current() == lex_slash) | |
12054 { | |
12055 _lexer.next(); | |
12056 | |
12057 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
12058 if (!n) return 0; | |
12059 | |
12060 // relative location path can start from axis_attribute, dot, double_dot, multiply and string lexemes; any other lexeme means standalone root path | |
12061 lexeme_t l = _lexer.current(); | |
12062 | |
12063 if (l == lex_string || l == lex_axis_attribute || l == lex_dot || l == lex_double_dot || l == lex_multiply) | |
12064 return parse_relative_location_path(n); | |
12065 else | |
12066 return n; | |
12067 } | |
12068 else if (_lexer.current() == lex_double_slash) | |
12069 { | |
12070 _lexer.next(); | |
12071 | |
12072 xpath_ast_node* n = alloc_node(ast_step_root, xpath_type_node_set); | |
12073 if (!n) return 0; | |
12074 | |
12075 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
12076 if (!n) return 0; | |
12077 | |
12078 return parse_relative_location_path(n); | |
12079 } | |
12080 | |
12081 // else clause moved outside of if because of bogus warning 'control may reach end of non-void function being inlined' in gcc 4.0.1 | |
12082 return parse_relative_location_path(0); | |
12083 } | |
12084 | |
12085 // PathExpr ::= LocationPath | |
12086 // | FilterExpr | |
12087 // | FilterExpr '/' RelativeLocationPath | |
12088 // | FilterExpr '//' RelativeLocationPath | |
12089 // UnionExpr ::= PathExpr | UnionExpr '|' PathExpr | |
12090 // UnaryExpr ::= UnionExpr | '-' UnaryExpr | |
12091 xpath_ast_node* parse_path_or_unary_expression() | |
12092 { | |
12093 // Clarification. | |
12094 // PathExpr begins with either LocationPath or FilterExpr. | |
12095 // FilterExpr begins with PrimaryExpr | |
12096 // PrimaryExpr begins with '$' in case of it being a variable reference, | |
12097 // '(' in case of it being an expression, string literal, number constant or | |
12098 // function call. | |
12099 if (_lexer.current() == lex_var_ref || _lexer.current() == lex_open_brace || | |
12100 _lexer.current() == lex_quoted_string || _lexer.current() == lex_number || | |
12101 _lexer.current() == lex_string) | |
12102 { | |
12103 if (_lexer.current() == lex_string) | |
12104 { | |
12105 // This is either a function call, or not - if not, we shall proceed with location path | |
12106 const char_t* state = _lexer.state(); | |
12107 | |
12108 while (PUGI_IMPL_IS_CHARTYPE(*state, ct_space)) ++state; | |
12109 | |
12110 if (*state != '(') | |
12111 return parse_location_path(); | |
12112 | |
12113 // This looks like a function call; however this still can be a node-test. Check it. | |
12114 if (parse_node_test_type(_lexer.contents()) != nodetest_none) | |
12115 return parse_location_path(); | |
12116 } | |
12117 | |
12118 xpath_ast_node* n = parse_filter_expression(); | |
12119 if (!n) return 0; | |
12120 | |
12121 if (_lexer.current() == lex_slash || _lexer.current() == lex_double_slash) | |
12122 { | |
12123 lexeme_t l = _lexer.current(); | |
12124 _lexer.next(); | |
12125 | |
12126 if (l == lex_double_slash) | |
12127 { | |
12128 if (n->rettype() != xpath_type_node_set) | |
12129 return error("Step has to be applied to node set"); | |
12130 | |
12131 n = alloc_node(ast_step, n, axis_descendant_or_self, nodetest_type_node, 0); | |
12132 if (!n) return 0; | |
12133 } | |
12134 | |
12135 // select from location path | |
12136 return parse_relative_location_path(n); | |
12137 } | |
12138 | |
12139 return n; | |
12140 } | |
12141 else if (_lexer.current() == lex_minus) | |
12142 { | |
12143 _lexer.next(); | |
12144 | |
12145 // precedence 7+ - only parses union expressions | |
12146 xpath_ast_node* n = parse_expression(7); | |
12147 if (!n) return 0; | |
12148 | |
12149 return alloc_node(ast_op_negate, xpath_type_number, n); | |
12150 } | |
12151 else | |
12152 { | |
12153 return parse_location_path(); | |
12154 } | |
12155 } | |
12156 | |
12157 struct binary_op_t | |
12158 { | |
12159 ast_type_t asttype; | |
12160 xpath_value_type rettype; | |
12161 int precedence; | |
12162 | |
12163 binary_op_t(): asttype(ast_unknown), rettype(xpath_type_none), precedence(0) | |
12164 { | |
12165 } | |
12166 | |
12167 binary_op_t(ast_type_t asttype_, xpath_value_type rettype_, int precedence_): asttype(asttype_), rettype(rettype_), precedence(precedence_) | |
12168 { | |
12169 } | |
12170 | |
12171 static binary_op_t parse(xpath_lexer& lexer) | |
12172 { | |
12173 switch (lexer.current()) | |
12174 { | |
12175 case lex_string: | |
12176 if (lexer.contents() == PUGIXML_TEXT("or")) | |
12177 return binary_op_t(ast_op_or, xpath_type_boolean, 1); | |
12178 else if (lexer.contents() == PUGIXML_TEXT("and")) | |
12179 return binary_op_t(ast_op_and, xpath_type_boolean, 2); | |
12180 else if (lexer.contents() == PUGIXML_TEXT("div")) | |
12181 return binary_op_t(ast_op_divide, xpath_type_number, 6); | |
12182 else if (lexer.contents() == PUGIXML_TEXT("mod")) | |
12183 return binary_op_t(ast_op_mod, xpath_type_number, 6); | |
12184 else | |
12185 return binary_op_t(); | |
12186 | |
12187 case lex_equal: | |
12188 return binary_op_t(ast_op_equal, xpath_type_boolean, 3); | |
12189 | |
12190 case lex_not_equal: | |
12191 return binary_op_t(ast_op_not_equal, xpath_type_boolean, 3); | |
12192 | |
12193 case lex_less: | |
12194 return binary_op_t(ast_op_less, xpath_type_boolean, 4); | |
12195 | |
12196 case lex_greater: | |
12197 return binary_op_t(ast_op_greater, xpath_type_boolean, 4); | |
12198 | |
12199 case lex_less_or_equal: | |
12200 return binary_op_t(ast_op_less_or_equal, xpath_type_boolean, 4); | |
12201 | |
12202 case lex_greater_or_equal: | |
12203 return binary_op_t(ast_op_greater_or_equal, xpath_type_boolean, 4); | |
12204 | |
12205 case lex_plus: | |
12206 return binary_op_t(ast_op_add, xpath_type_number, 5); | |
12207 | |
12208 case lex_minus: | |
12209 return binary_op_t(ast_op_subtract, xpath_type_number, 5); | |
12210 | |
12211 case lex_multiply: | |
12212 return binary_op_t(ast_op_multiply, xpath_type_number, 6); | |
12213 | |
12214 case lex_union: | |
12215 return binary_op_t(ast_op_union, xpath_type_node_set, 7); | |
12216 | |
12217 default: | |
12218 return binary_op_t(); | |
12219 } | |
12220 } | |
12221 }; | |
12222 | |
12223 xpath_ast_node* parse_expression_rec(xpath_ast_node* lhs, int limit) | |
12224 { | |
12225 binary_op_t op = binary_op_t::parse(_lexer); | |
12226 | |
12227 while (op.asttype != ast_unknown && op.precedence >= limit) | |
12228 { | |
12229 _lexer.next(); | |
12230 | |
12231 if (++_depth > xpath_ast_depth_limit) | |
12232 return error_rec(); | |
12233 | |
12234 xpath_ast_node* rhs = parse_path_or_unary_expression(); | |
12235 if (!rhs) return 0; | |
12236 | |
12237 binary_op_t nextop = binary_op_t::parse(_lexer); | |
12238 | |
12239 while (nextop.asttype != ast_unknown && nextop.precedence > op.precedence) | |
12240 { | |
12241 rhs = parse_expression_rec(rhs, nextop.precedence); | |
12242 if (!rhs) return 0; | |
12243 | |
12244 nextop = binary_op_t::parse(_lexer); | |
12245 } | |
12246 | |
12247 if (op.asttype == ast_op_union && (lhs->rettype() != xpath_type_node_set || rhs->rettype() != xpath_type_node_set)) | |
12248 return error("Union operator has to be applied to node sets"); | |
12249 | |
12250 lhs = alloc_node(op.asttype, op.rettype, lhs, rhs); | |
12251 if (!lhs) return 0; | |
12252 | |
12253 op = binary_op_t::parse(_lexer); | |
12254 } | |
12255 | |
12256 return lhs; | |
12257 } | |
12258 | |
12259 // Expr ::= OrExpr | |
12260 // OrExpr ::= AndExpr | OrExpr 'or' AndExpr | |
12261 // AndExpr ::= EqualityExpr | AndExpr 'and' EqualityExpr | |
12262 // EqualityExpr ::= RelationalExpr | |
12263 // | EqualityExpr '=' RelationalExpr | |
12264 // | EqualityExpr '!=' RelationalExpr | |
12265 // RelationalExpr ::= AdditiveExpr | |
12266 // | RelationalExpr '<' AdditiveExpr | |
12267 // | RelationalExpr '>' AdditiveExpr | |
12268 // | RelationalExpr '<=' AdditiveExpr | |
12269 // | RelationalExpr '>=' AdditiveExpr | |
12270 // AdditiveExpr ::= MultiplicativeExpr | |
12271 // | AdditiveExpr '+' MultiplicativeExpr | |
12272 // | AdditiveExpr '-' MultiplicativeExpr | |
12273 // MultiplicativeExpr ::= UnaryExpr | |
12274 // | MultiplicativeExpr '*' UnaryExpr | |
12275 // | MultiplicativeExpr 'div' UnaryExpr | |
12276 // | MultiplicativeExpr 'mod' UnaryExpr | |
12277 xpath_ast_node* parse_expression(int limit = 0) | |
12278 { | |
12279 size_t old_depth = _depth; | |
12280 | |
12281 if (++_depth > xpath_ast_depth_limit) | |
12282 return error_rec(); | |
12283 | |
12284 xpath_ast_node* n = parse_path_or_unary_expression(); | |
12285 if (!n) return 0; | |
12286 | |
12287 n = parse_expression_rec(n, limit); | |
12288 | |
12289 _depth = old_depth; | |
12290 | |
12291 return n; | |
12292 } | |
12293 | |
12294 xpath_parser(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result): _alloc(alloc), _lexer(query), _query(query), _variables(variables), _result(result), _depth(0) | |
12295 { | |
12296 } | |
12297 | |
12298 xpath_ast_node* parse() | |
12299 { | |
12300 xpath_ast_node* n = parse_expression(); | |
12301 if (!n) return 0; | |
12302 | |
12303 assert(_depth == 0); | |
12304 | |
12305 // check if there are unparsed tokens left | |
12306 if (_lexer.current() != lex_eof) | |
12307 return error("Incorrect query"); | |
12308 | |
12309 return n; | |
12310 } | |
12311 | |
12312 static xpath_ast_node* parse(const char_t* query, xpath_variable_set* variables, xpath_allocator* alloc, xpath_parse_result* result) | |
12313 { | |
12314 xpath_parser parser(query, variables, alloc, result); | |
12315 | |
12316 return parser.parse(); | |
12317 } | |
12318 }; | |
12319 | |
12320 struct xpath_query_impl | |
12321 { | |
12322 static xpath_query_impl* create() | |
12323 { | |
12324 void* memory = xml_memory::allocate(sizeof(xpath_query_impl)); | |
12325 if (!memory) return 0; | |
12326 | |
12327 return new (memory) xpath_query_impl(); | |
12328 } | |
12329 | |
12330 static void destroy(xpath_query_impl* impl) | |
12331 { | |
12332 // free all allocated pages | |
12333 impl->alloc.release(); | |
12334 | |
12335 // free allocator memory (with the first page) | |
12336 xml_memory::deallocate(impl); | |
12337 } | |
12338 | |
12339 xpath_query_impl(): root(0), alloc(&block, &oom), oom(false) | |
12340 { | |
12341 block.next = 0; | |
12342 block.capacity = sizeof(block.data); | |
12343 } | |
12344 | |
12345 xpath_ast_node* root; | |
12346 xpath_allocator alloc; | |
12347 xpath_memory_block block; | |
12348 bool oom; | |
12349 }; | |
12350 | |
12351 PUGI_IMPL_FN impl::xpath_ast_node* evaluate_node_set_prepare(xpath_query_impl* impl) | |
12352 { | |
12353 if (!impl) return 0; | |
12354 | |
12355 if (impl->root->rettype() != xpath_type_node_set) | |
12356 { | |
12357 #ifdef PUGIXML_NO_EXCEPTIONS | |
12358 return 0; | |
12359 #else | |
12360 xpath_parse_result res; | |
12361 res.error = "Expression does not evaluate to node set"; | |
12362 | |
12363 throw xpath_exception(res); | |
12364 #endif | |
12365 } | |
12366 | |
12367 return impl->root; | |
12368 } | |
12369 PUGI_IMPL_NS_END | |
12370 | |
12371 namespace pugi | |
12372 { | |
12373 #ifndef PUGIXML_NO_EXCEPTIONS | |
12374 PUGI_IMPL_FN xpath_exception::xpath_exception(const xpath_parse_result& result_): _result(result_) | |
12375 { | |
12376 assert(_result.error); | |
12377 } | |
12378 | |
12379 PUGI_IMPL_FN const char* xpath_exception::what() const throw() | |
12380 { | |
12381 return _result.error; | |
12382 } | |
12383 | |
12384 PUGI_IMPL_FN const xpath_parse_result& xpath_exception::result() const | |
12385 { | |
12386 return _result; | |
12387 } | |
12388 #endif | |
12389 | |
12390 PUGI_IMPL_FN xpath_node::xpath_node() | |
12391 { | |
12392 } | |
12393 | |
12394 PUGI_IMPL_FN xpath_node::xpath_node(const xml_node& node_): _node(node_) | |
12395 { | |
12396 } | |
12397 | |
12398 PUGI_IMPL_FN xpath_node::xpath_node(const xml_attribute& attribute_, const xml_node& parent_): _node(attribute_ ? parent_ : xml_node()), _attribute(attribute_) | |
12399 { | |
12400 } | |
12401 | |
12402 PUGI_IMPL_FN xml_node xpath_node::node() const | |
12403 { | |
12404 return _attribute ? xml_node() : _node; | |
12405 } | |
12406 | |
12407 PUGI_IMPL_FN xml_attribute xpath_node::attribute() const | |
12408 { | |
12409 return _attribute; | |
12410 } | |
12411 | |
12412 PUGI_IMPL_FN xml_node xpath_node::parent() const | |
12413 { | |
12414 return _attribute ? _node : _node.parent(); | |
12415 } | |
12416 | |
12417 PUGI_IMPL_FN static void unspecified_bool_xpath_node(xpath_node***) | |
12418 { | |
12419 } | |
12420 | |
12421 PUGI_IMPL_FN xpath_node::operator xpath_node::unspecified_bool_type() const | |
12422 { | |
12423 return (_node || _attribute) ? unspecified_bool_xpath_node : 0; | |
12424 } | |
12425 | |
12426 PUGI_IMPL_FN bool xpath_node::operator!() const | |
12427 { | |
12428 return !(_node || _attribute); | |
12429 } | |
12430 | |
12431 PUGI_IMPL_FN bool xpath_node::operator==(const xpath_node& n) const | |
12432 { | |
12433 return _node == n._node && _attribute == n._attribute; | |
12434 } | |
12435 | |
12436 PUGI_IMPL_FN bool xpath_node::operator!=(const xpath_node& n) const | |
12437 { | |
12438 return _node != n._node || _attribute != n._attribute; | |
12439 } | |
12440 | |
12441 #ifdef __BORLANDC__ | |
12442 PUGI_IMPL_FN bool operator&&(const xpath_node& lhs, bool rhs) | |
12443 { | |
12444 return (bool)lhs && rhs; | |
12445 } | |
12446 | |
12447 PUGI_IMPL_FN bool operator||(const xpath_node& lhs, bool rhs) | |
12448 { | |
12449 return (bool)lhs || rhs; | |
12450 } | |
12451 #endif | |
12452 | |
12453 PUGI_IMPL_FN void xpath_node_set::_assign(const_iterator begin_, const_iterator end_, type_t type_) | |
12454 { | |
12455 assert(begin_ <= end_); | |
12456 | |
12457 size_t size_ = static_cast<size_t>(end_ - begin_); | |
12458 | |
12459 // use internal buffer for 0 or 1 elements, heap buffer otherwise | |
12460 xpath_node* storage = (size_ <= 1) ? _storage : static_cast<xpath_node*>(impl::xml_memory::allocate(size_ * sizeof(xpath_node))); | |
12461 | |
12462 if (!storage) | |
12463 { | |
12464 #ifdef PUGIXML_NO_EXCEPTIONS | |
12465 return; | |
12466 #else | |
12467 throw std::bad_alloc(); | |
12468 #endif | |
12469 } | |
12470 | |
12471 // deallocate old buffer | |
12472 if (_begin != _storage) | |
12473 impl::xml_memory::deallocate(_begin); | |
12474 | |
12475 // size check is necessary because for begin_ = end_ = nullptr, memcpy is UB | |
12476 if (size_) | |
12477 memcpy(storage, begin_, size_ * sizeof(xpath_node)); | |
12478 | |
12479 _begin = storage; | |
12480 _end = storage + size_; | |
12481 _type = type_; | |
12482 } | |
12483 | |
12484 #ifdef PUGIXML_HAS_MOVE | |
12485 PUGI_IMPL_FN void xpath_node_set::_move(xpath_node_set& rhs) PUGIXML_NOEXCEPT | |
12486 { | |
12487 _type = rhs._type; | |
12488 _storage[0] = rhs._storage[0]; | |
12489 _begin = (rhs._begin == rhs._storage) ? _storage : rhs._begin; | |
12490 _end = _begin + (rhs._end - rhs._begin); | |
12491 | |
12492 rhs._type = type_unsorted; | |
12493 rhs._begin = rhs._storage; | |
12494 rhs._end = rhs._storage; | |
12495 } | |
12496 #endif | |
12497 | |
12498 PUGI_IMPL_FN xpath_node_set::xpath_node_set(): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12499 { | |
12500 } | |
12501 | |
12502 PUGI_IMPL_FN xpath_node_set::xpath_node_set(const_iterator begin_, const_iterator end_, type_t type_): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12503 { | |
12504 _assign(begin_, end_, type_); | |
12505 } | |
12506 | |
12507 PUGI_IMPL_FN xpath_node_set::~xpath_node_set() | |
12508 { | |
12509 if (_begin != _storage) | |
12510 impl::xml_memory::deallocate(_begin); | |
12511 } | |
12512 | |
12513 PUGI_IMPL_FN xpath_node_set::xpath_node_set(const xpath_node_set& ns): _type(type_unsorted), _begin(_storage), _end(_storage) | |
12514 { | |
12515 _assign(ns._begin, ns._end, ns._type); | |
12516 } | |
12517 | |
12518 PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(const xpath_node_set& ns) | |
12519 { | |
12520 if (this == &ns) return *this; | |
12521 | |
12522 _assign(ns._begin, ns._end, ns._type); | |
12523 | |
12524 return *this; | |
12525 } | |
12526 | |
12527 #ifdef PUGIXML_HAS_MOVE | |
12528 PUGI_IMPL_FN xpath_node_set::xpath_node_set(xpath_node_set&& rhs) PUGIXML_NOEXCEPT: _type(type_unsorted), _begin(_storage), _end(_storage) | |
12529 { | |
12530 _move(rhs); | |
12531 } | |
12532 | |
12533 PUGI_IMPL_FN xpath_node_set& xpath_node_set::operator=(xpath_node_set&& rhs) PUGIXML_NOEXCEPT | |
12534 { | |
12535 if (this == &rhs) return *this; | |
12536 | |
12537 if (_begin != _storage) | |
12538 impl::xml_memory::deallocate(_begin); | |
12539 | |
12540 _move(rhs); | |
12541 | |
12542 return *this; | |
12543 } | |
12544 #endif | |
12545 | |
12546 PUGI_IMPL_FN xpath_node_set::type_t xpath_node_set::type() const | |
12547 { | |
12548 return _type; | |
12549 } | |
12550 | |
12551 PUGI_IMPL_FN size_t xpath_node_set::size() const | |
12552 { | |
12553 return _end - _begin; | |
12554 } | |
12555 | |
12556 PUGI_IMPL_FN bool xpath_node_set::empty() const | |
12557 { | |
12558 return _begin == _end; | |
12559 } | |
12560 | |
12561 PUGI_IMPL_FN const xpath_node& xpath_node_set::operator[](size_t index) const | |
12562 { | |
12563 assert(index < size()); | |
12564 return _begin[index]; | |
12565 } | |
12566 | |
12567 PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::begin() const | |
12568 { | |
12569 return _begin; | |
12570 } | |
12571 | |
12572 PUGI_IMPL_FN xpath_node_set::const_iterator xpath_node_set::end() const | |
12573 { | |
12574 return _end; | |
12575 } | |
12576 | |
12577 PUGI_IMPL_FN void xpath_node_set::sort(bool reverse) | |
12578 { | |
12579 _type = impl::xpath_sort(_begin, _end, _type, reverse); | |
12580 } | |
12581 | |
12582 PUGI_IMPL_FN xpath_node xpath_node_set::first() const | |
12583 { | |
12584 return impl::xpath_first(_begin, _end, _type); | |
12585 } | |
12586 | |
12587 PUGI_IMPL_FN xpath_parse_result::xpath_parse_result(): error("Internal error"), offset(0) | |
12588 { | |
12589 } | |
12590 | |
12591 PUGI_IMPL_FN xpath_parse_result::operator bool() const | |
12592 { | |
12593 return error == 0; | |
12594 } | |
12595 | |
12596 PUGI_IMPL_FN const char* xpath_parse_result::description() const | |
12597 { | |
12598 return error ? error : "No error"; | |
12599 } | |
12600 | |
12601 PUGI_IMPL_FN xpath_variable::xpath_variable(xpath_value_type type_): _type(type_), _next(0) | |
12602 { | |
12603 } | |
12604 | |
12605 PUGI_IMPL_FN const char_t* xpath_variable::name() const | |
12606 { | |
12607 switch (_type) | |
12608 { | |
12609 case xpath_type_node_set: | |
12610 return static_cast<const impl::xpath_variable_node_set*>(this)->name; | |
12611 | |
12612 case xpath_type_number: | |
12613 return static_cast<const impl::xpath_variable_number*>(this)->name; | |
12614 | |
12615 case xpath_type_string: | |
12616 return static_cast<const impl::xpath_variable_string*>(this)->name; | |
12617 | |
12618 case xpath_type_boolean: | |
12619 return static_cast<const impl::xpath_variable_boolean*>(this)->name; | |
12620 | |
12621 default: | |
12622 assert(false && "Invalid variable type"); // unreachable | |
12623 return 0; | |
12624 } | |
12625 } | |
12626 | |
12627 PUGI_IMPL_FN xpath_value_type xpath_variable::type() const | |
12628 { | |
12629 return _type; | |
12630 } | |
12631 | |
12632 PUGI_IMPL_FN bool xpath_variable::get_boolean() const | |
12633 { | |
12634 return (_type == xpath_type_boolean) ? static_cast<const impl::xpath_variable_boolean*>(this)->value : false; | |
12635 } | |
12636 | |
12637 PUGI_IMPL_FN double xpath_variable::get_number() const | |
12638 { | |
12639 return (_type == xpath_type_number) ? static_cast<const impl::xpath_variable_number*>(this)->value : impl::gen_nan(); | |
12640 } | |
12641 | |
12642 PUGI_IMPL_FN const char_t* xpath_variable::get_string() const | |
12643 { | |
12644 const char_t* value = (_type == xpath_type_string) ? static_cast<const impl::xpath_variable_string*>(this)->value : 0; | |
12645 return value ? value : PUGIXML_TEXT(""); | |
12646 } | |
12647 | |
12648 PUGI_IMPL_FN const xpath_node_set& xpath_variable::get_node_set() const | |
12649 { | |
12650 return (_type == xpath_type_node_set) ? static_cast<const impl::xpath_variable_node_set*>(this)->value : impl::dummy_node_set; | |
12651 } | |
12652 | |
12653 PUGI_IMPL_FN bool xpath_variable::set(bool value) | |
12654 { | |
12655 if (_type != xpath_type_boolean) return false; | |
12656 | |
12657 static_cast<impl::xpath_variable_boolean*>(this)->value = value; | |
12658 return true; | |
12659 } | |
12660 | |
12661 PUGI_IMPL_FN bool xpath_variable::set(double value) | |
12662 { | |
12663 if (_type != xpath_type_number) return false; | |
12664 | |
12665 static_cast<impl::xpath_variable_number*>(this)->value = value; | |
12666 return true; | |
12667 } | |
12668 | |
12669 PUGI_IMPL_FN bool xpath_variable::set(const char_t* value) | |
12670 { | |
12671 if (_type != xpath_type_string) return false; | |
12672 | |
12673 impl::xpath_variable_string* var = static_cast<impl::xpath_variable_string*>(this); | |
12674 | |
12675 // duplicate string | |
12676 size_t size = (impl::strlength(value) + 1) * sizeof(char_t); | |
12677 | |
12678 char_t* copy = static_cast<char_t*>(impl::xml_memory::allocate(size)); | |
12679 if (!copy) return false; | |
12680 | |
12681 memcpy(copy, value, size); | |
12682 | |
12683 // replace old string | |
12684 if (var->value) impl::xml_memory::deallocate(var->value); | |
12685 var->value = copy; | |
12686 | |
12687 return true; | |
12688 } | |
12689 | |
12690 PUGI_IMPL_FN bool xpath_variable::set(const xpath_node_set& value) | |
12691 { | |
12692 if (_type != xpath_type_node_set) return false; | |
12693 | |
12694 static_cast<impl::xpath_variable_node_set*>(this)->value = value; | |
12695 return true; | |
12696 } | |
12697 | |
12698 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set() | |
12699 { | |
12700 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12701 _data[i] = 0; | |
12702 } | |
12703 | |
12704 PUGI_IMPL_FN xpath_variable_set::~xpath_variable_set() | |
12705 { | |
12706 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12707 _destroy(_data[i]); | |
12708 } | |
12709 | |
12710 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(const xpath_variable_set& rhs) | |
12711 { | |
12712 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12713 _data[i] = 0; | |
12714 | |
12715 _assign(rhs); | |
12716 } | |
12717 | |
12718 PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(const xpath_variable_set& rhs) | |
12719 { | |
12720 if (this == &rhs) return *this; | |
12721 | |
12722 _assign(rhs); | |
12723 | |
12724 return *this; | |
12725 } | |
12726 | |
12727 #ifdef PUGIXML_HAS_MOVE | |
12728 PUGI_IMPL_FN xpath_variable_set::xpath_variable_set(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
12729 { | |
12730 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12731 { | |
12732 _data[i] = rhs._data[i]; | |
12733 rhs._data[i] = 0; | |
12734 } | |
12735 } | |
12736 | |
12737 PUGI_IMPL_FN xpath_variable_set& xpath_variable_set::operator=(xpath_variable_set&& rhs) PUGIXML_NOEXCEPT | |
12738 { | |
12739 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12740 { | |
12741 _destroy(_data[i]); | |
12742 | |
12743 _data[i] = rhs._data[i]; | |
12744 rhs._data[i] = 0; | |
12745 } | |
12746 | |
12747 return *this; | |
12748 } | |
12749 #endif | |
12750 | |
12751 PUGI_IMPL_FN void xpath_variable_set::_assign(const xpath_variable_set& rhs) | |
12752 { | |
12753 xpath_variable_set temp; | |
12754 | |
12755 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12756 if (rhs._data[i] && !_clone(rhs._data[i], &temp._data[i])) | |
12757 return; | |
12758 | |
12759 _swap(temp); | |
12760 } | |
12761 | |
12762 PUGI_IMPL_FN void xpath_variable_set::_swap(xpath_variable_set& rhs) | |
12763 { | |
12764 for (size_t i = 0; i < sizeof(_data) / sizeof(_data[0]); ++i) | |
12765 { | |
12766 xpath_variable* chain = _data[i]; | |
12767 | |
12768 _data[i] = rhs._data[i]; | |
12769 rhs._data[i] = chain; | |
12770 } | |
12771 } | |
12772 | |
12773 PUGI_IMPL_FN xpath_variable* xpath_variable_set::_find(const char_t* name) const | |
12774 { | |
12775 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
12776 size_t hash = impl::hash_string(name) % hash_size; | |
12777 | |
12778 // look for existing variable | |
12779 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
12780 if (impl::strequal(var->name(), name)) | |
12781 return var; | |
12782 | |
12783 return 0; | |
12784 } | |
12785 | |
12786 PUGI_IMPL_FN bool xpath_variable_set::_clone(xpath_variable* var, xpath_variable** out_result) | |
12787 { | |
12788 xpath_variable* last = 0; | |
12789 | |
12790 while (var) | |
12791 { | |
12792 // allocate storage for new variable | |
12793 xpath_variable* nvar = impl::new_xpath_variable(var->_type, var->name()); | |
12794 if (!nvar) return false; | |
12795 | |
12796 // link the variable to the result immediately to handle failures gracefully | |
12797 if (last) | |
12798 last->_next = nvar; | |
12799 else | |
12800 *out_result = nvar; | |
12801 | |
12802 last = nvar; | |
12803 | |
12804 // copy the value; this can fail due to out-of-memory conditions | |
12805 if (!impl::copy_xpath_variable(nvar, var)) return false; | |
12806 | |
12807 var = var->_next; | |
12808 } | |
12809 | |
12810 return true; | |
12811 } | |
12812 | |
12813 PUGI_IMPL_FN void xpath_variable_set::_destroy(xpath_variable* var) | |
12814 { | |
12815 while (var) | |
12816 { | |
12817 xpath_variable* next = var->_next; | |
12818 | |
12819 impl::delete_xpath_variable(var->_type, var); | |
12820 | |
12821 var = next; | |
12822 } | |
12823 } | |
12824 | |
12825 PUGI_IMPL_FN xpath_variable* xpath_variable_set::add(const char_t* name, xpath_value_type type) | |
12826 { | |
12827 const size_t hash_size = sizeof(_data) / sizeof(_data[0]); | |
12828 size_t hash = impl::hash_string(name) % hash_size; | |
12829 | |
12830 // look for existing variable | |
12831 for (xpath_variable* var = _data[hash]; var; var = var->_next) | |
12832 if (impl::strequal(var->name(), name)) | |
12833 return var->type() == type ? var : 0; | |
12834 | |
12835 // add new variable | |
12836 xpath_variable* result = impl::new_xpath_variable(type, name); | |
12837 | |
12838 if (result) | |
12839 { | |
12840 result->_next = _data[hash]; | |
12841 | |
12842 _data[hash] = result; | |
12843 } | |
12844 | |
12845 return result; | |
12846 } | |
12847 | |
12848 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, bool value) | |
12849 { | |
12850 xpath_variable* var = add(name, xpath_type_boolean); | |
12851 return var ? var->set(value) : false; | |
12852 } | |
12853 | |
12854 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, double value) | |
12855 { | |
12856 xpath_variable* var = add(name, xpath_type_number); | |
12857 return var ? var->set(value) : false; | |
12858 } | |
12859 | |
12860 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const char_t* value) | |
12861 { | |
12862 xpath_variable* var = add(name, xpath_type_string); | |
12863 return var ? var->set(value) : false; | |
12864 } | |
12865 | |
12866 PUGI_IMPL_FN bool xpath_variable_set::set(const char_t* name, const xpath_node_set& value) | |
12867 { | |
12868 xpath_variable* var = add(name, xpath_type_node_set); | |
12869 return var ? var->set(value) : false; | |
12870 } | |
12871 | |
12872 PUGI_IMPL_FN xpath_variable* xpath_variable_set::get(const char_t* name) | |
12873 { | |
12874 return _find(name); | |
12875 } | |
12876 | |
12877 PUGI_IMPL_FN const xpath_variable* xpath_variable_set::get(const char_t* name) const | |
12878 { | |
12879 return _find(name); | |
12880 } | |
12881 | |
12882 PUGI_IMPL_FN xpath_query::xpath_query(const char_t* query, xpath_variable_set* variables): _impl(0) | |
12883 { | |
12884 impl::xpath_query_impl* qimpl = impl::xpath_query_impl::create(); | |
12885 | |
12886 if (!qimpl) | |
12887 { | |
12888 #ifdef PUGIXML_NO_EXCEPTIONS | |
12889 _result.error = "Out of memory"; | |
12890 #else | |
12891 throw std::bad_alloc(); | |
12892 #endif | |
12893 } | |
12894 else | |
12895 { | |
12896 using impl::auto_deleter; // MSVC7 workaround | |
12897 auto_deleter<impl::xpath_query_impl> impl(qimpl, impl::xpath_query_impl::destroy); | |
12898 | |
12899 qimpl->root = impl::xpath_parser::parse(query, variables, &qimpl->alloc, &_result); | |
12900 | |
12901 if (qimpl->root) | |
12902 { | |
12903 qimpl->root->optimize(&qimpl->alloc); | |
12904 | |
12905 _impl = impl.release(); | |
12906 _result.error = 0; | |
12907 } | |
12908 else | |
12909 { | |
12910 #ifdef PUGIXML_NO_EXCEPTIONS | |
12911 if (qimpl->oom) _result.error = "Out of memory"; | |
12912 #else | |
12913 if (qimpl->oom) throw std::bad_alloc(); | |
12914 throw xpath_exception(_result); | |
12915 #endif | |
12916 } | |
12917 } | |
12918 } | |
12919 | |
12920 PUGI_IMPL_FN xpath_query::xpath_query(): _impl(0) | |
12921 { | |
12922 } | |
12923 | |
12924 PUGI_IMPL_FN xpath_query::~xpath_query() | |
12925 { | |
12926 if (_impl) | |
12927 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
12928 } | |
12929 | |
12930 #ifdef PUGIXML_HAS_MOVE | |
12931 PUGI_IMPL_FN xpath_query::xpath_query(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
12932 { | |
12933 _impl = rhs._impl; | |
12934 _result = rhs._result; | |
12935 rhs._impl = 0; | |
12936 rhs._result = xpath_parse_result(); | |
12937 } | |
12938 | |
12939 PUGI_IMPL_FN xpath_query& xpath_query::operator=(xpath_query&& rhs) PUGIXML_NOEXCEPT | |
12940 { | |
12941 if (this == &rhs) return *this; | |
12942 | |
12943 if (_impl) | |
12944 impl::xpath_query_impl::destroy(static_cast<impl::xpath_query_impl*>(_impl)); | |
12945 | |
12946 _impl = rhs._impl; | |
12947 _result = rhs._result; | |
12948 rhs._impl = 0; | |
12949 rhs._result = xpath_parse_result(); | |
12950 | |
12951 return *this; | |
12952 } | |
12953 #endif | |
12954 | |
12955 PUGI_IMPL_FN xpath_value_type xpath_query::return_type() const | |
12956 { | |
12957 if (!_impl) return xpath_type_none; | |
12958 | |
12959 return static_cast<impl::xpath_query_impl*>(_impl)->root->rettype(); | |
12960 } | |
12961 | |
12962 PUGI_IMPL_FN bool xpath_query::evaluate_boolean(const xpath_node& n) const | |
12963 { | |
12964 if (!_impl) return false; | |
12965 | |
12966 impl::xpath_context c(n, 1, 1); | |
12967 impl::xpath_stack_data sd; | |
12968 | |
12969 bool r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_boolean(c, sd.stack); | |
12970 | |
12971 if (sd.oom) | |
12972 { | |
12973 #ifdef PUGIXML_NO_EXCEPTIONS | |
12974 return false; | |
12975 #else | |
12976 throw std::bad_alloc(); | |
12977 #endif | |
12978 } | |
12979 | |
12980 return r; | |
12981 } | |
12982 | |
12983 PUGI_IMPL_FN double xpath_query::evaluate_number(const xpath_node& n) const | |
12984 { | |
12985 if (!_impl) return impl::gen_nan(); | |
12986 | |
12987 impl::xpath_context c(n, 1, 1); | |
12988 impl::xpath_stack_data sd; | |
12989 | |
12990 double r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_number(c, sd.stack); | |
12991 | |
12992 if (sd.oom) | |
12993 { | |
12994 #ifdef PUGIXML_NO_EXCEPTIONS | |
12995 return impl::gen_nan(); | |
12996 #else | |
12997 throw std::bad_alloc(); | |
12998 #endif | |
12999 } | |
13000 | |
13001 return r; | |
13002 } | |
13003 | |
13004 #ifndef PUGIXML_NO_STL | |
13005 PUGI_IMPL_FN string_t xpath_query::evaluate_string(const xpath_node& n) const | |
13006 { | |
13007 if (!_impl) return string_t(); | |
13008 | |
13009 impl::xpath_context c(n, 1, 1); | |
13010 impl::xpath_stack_data sd; | |
13011 | |
13012 impl::xpath_string r = static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack); | |
13013 | |
13014 if (sd.oom) | |
13015 { | |
13016 #ifdef PUGIXML_NO_EXCEPTIONS | |
13017 return string_t(); | |
13018 #else | |
13019 throw std::bad_alloc(); | |
13020 #endif | |
13021 } | |
13022 | |
13023 return string_t(r.c_str(), r.length()); | |
13024 } | |
13025 #endif | |
13026 | |
13027 PUGI_IMPL_FN size_t xpath_query::evaluate_string(char_t* buffer, size_t capacity, const xpath_node& n) const | |
13028 { | |
13029 impl::xpath_context c(n, 1, 1); | |
13030 impl::xpath_stack_data sd; | |
13031 | |
13032 impl::xpath_string r = _impl ? static_cast<impl::xpath_query_impl*>(_impl)->root->eval_string(c, sd.stack) : impl::xpath_string(); | |
13033 | |
13034 if (sd.oom) | |
13035 { | |
13036 #ifdef PUGIXML_NO_EXCEPTIONS | |
13037 r = impl::xpath_string(); | |
13038 #else | |
13039 throw std::bad_alloc(); | |
13040 #endif | |
13041 } | |
13042 | |
13043 size_t full_size = r.length() + 1; | |
13044 | |
13045 if (capacity > 0) | |
13046 { | |
13047 size_t size = (full_size < capacity) ? full_size : capacity; | |
13048 assert(size > 0); | |
13049 | |
13050 memcpy(buffer, r.c_str(), (size - 1) * sizeof(char_t)); | |
13051 buffer[size - 1] = 0; | |
13052 } | |
13053 | |
13054 return full_size; | |
13055 } | |
13056 | |
13057 PUGI_IMPL_FN xpath_node_set xpath_query::evaluate_node_set(const xpath_node& n) const | |
13058 { | |
13059 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
13060 if (!root) return xpath_node_set(); | |
13061 | |
13062 impl::xpath_context c(n, 1, 1); | |
13063 impl::xpath_stack_data sd; | |
13064 | |
13065 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_all); | |
13066 | |
13067 if (sd.oom) | |
13068 { | |
13069 #ifdef PUGIXML_NO_EXCEPTIONS | |
13070 return xpath_node_set(); | |
13071 #else | |
13072 throw std::bad_alloc(); | |
13073 #endif | |
13074 } | |
13075 | |
13076 return xpath_node_set(r.begin(), r.end(), r.type()); | |
13077 } | |
13078 | |
13079 PUGI_IMPL_FN xpath_node xpath_query::evaluate_node(const xpath_node& n) const | |
13080 { | |
13081 impl::xpath_ast_node* root = impl::evaluate_node_set_prepare(static_cast<impl::xpath_query_impl*>(_impl)); | |
13082 if (!root) return xpath_node(); | |
13083 | |
13084 impl::xpath_context c(n, 1, 1); | |
13085 impl::xpath_stack_data sd; | |
13086 | |
13087 impl::xpath_node_set_raw r = root->eval_node_set(c, sd.stack, impl::nodeset_eval_first); | |
13088 | |
13089 if (sd.oom) | |
13090 { | |
13091 #ifdef PUGIXML_NO_EXCEPTIONS | |
13092 return xpath_node(); | |
13093 #else | |
13094 throw std::bad_alloc(); | |
13095 #endif | |
13096 } | |
13097 | |
13098 return r.first(); | |
13099 } | |
13100 | |
13101 PUGI_IMPL_FN const xpath_parse_result& xpath_query::result() const | |
13102 { | |
13103 return _result; | |
13104 } | |
13105 | |
13106 PUGI_IMPL_FN static void unspecified_bool_xpath_query(xpath_query***) | |
13107 { | |
13108 } | |
13109 | |
13110 PUGI_IMPL_FN xpath_query::operator xpath_query::unspecified_bool_type() const | |
13111 { | |
13112 return _impl ? unspecified_bool_xpath_query : 0; | |
13113 } | |
13114 | |
13115 PUGI_IMPL_FN bool xpath_query::operator!() const | |
13116 { | |
13117 return !_impl; | |
13118 } | |
13119 | |
13120 PUGI_IMPL_FN xpath_node xml_node::select_node(const char_t* query, xpath_variable_set* variables) const | |
13121 { | |
13122 xpath_query q(query, variables); | |
13123 return q.evaluate_node(*this); | |
13124 } | |
13125 | |
13126 PUGI_IMPL_FN xpath_node xml_node::select_node(const xpath_query& query) const | |
13127 { | |
13128 return query.evaluate_node(*this); | |
13129 } | |
13130 | |
13131 PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const char_t* query, xpath_variable_set* variables) const | |
13132 { | |
13133 xpath_query q(query, variables); | |
13134 return q.evaluate_node_set(*this); | |
13135 } | |
13136 | |
13137 PUGI_IMPL_FN xpath_node_set xml_node::select_nodes(const xpath_query& query) const | |
13138 { | |
13139 return query.evaluate_node_set(*this); | |
13140 } | |
13141 | |
13142 PUGI_IMPL_FN xpath_node xml_node::select_single_node(const char_t* query, xpath_variable_set* variables) const | |
13143 { | |
13144 xpath_query q(query, variables); | |
13145 return q.evaluate_node(*this); | |
13146 } | |
13147 | |
13148 PUGI_IMPL_FN xpath_node xml_node::select_single_node(const xpath_query& query) const | |
13149 { | |
13150 return query.evaluate_node(*this); | |
13151 } | |
13152 } | |
13153 | |
13154 #endif | |
13155 | |
13156 #ifdef __BORLANDC__ | |
13157 # pragma option pop | |
13158 #endif | |
13159 | |
13160 // Intel C++ does not properly keep warning state for function templates, | |
13161 // so popping warning state at the end of translation unit leads to warnings in the middle. | |
13162 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) | |
13163 # pragma warning(pop) | |
13164 #endif | |
13165 | |
13166 #if defined(_MSC_VER) && defined(__c2__) | |
13167 # pragma clang diagnostic pop | |
13168 #endif | |
13169 | |
13170 // Undefine all local macros (makes sure we're not leaking macros in header-only mode) | |
13171 #undef PUGI_IMPL_NO_INLINE | |
13172 #undef PUGI_IMPL_UNLIKELY | |
13173 #undef PUGI_IMPL_STATIC_ASSERT | |
13174 #undef PUGI_IMPL_DMC_VOLATILE | |
13175 #undef PUGI_IMPL_UNSIGNED_OVERFLOW | |
13176 #undef PUGI_IMPL_MSVC_CRT_VERSION | |
13177 #undef PUGI_IMPL_SNPRINTF | |
13178 #undef PUGI_IMPL_NS_BEGIN | |
13179 #undef PUGI_IMPL_NS_END | |
13180 #undef PUGI_IMPL_FN | |
13181 #undef PUGI_IMPL_FN_NO_INLINE | |
13182 #undef PUGI_IMPL_GETHEADER_IMPL | |
13183 #undef PUGI_IMPL_GETPAGE_IMPL | |
13184 #undef PUGI_IMPL_GETPAGE | |
13185 #undef PUGI_IMPL_NODETYPE | |
13186 #undef PUGI_IMPL_IS_CHARTYPE_IMPL | |
13187 #undef PUGI_IMPL_IS_CHARTYPE | |
13188 #undef PUGI_IMPL_IS_CHARTYPEX | |
13189 #undef PUGI_IMPL_ENDSWITH | |
13190 #undef PUGI_IMPL_SKIPWS | |
13191 #undef PUGI_IMPL_OPTSET | |
13192 #undef PUGI_IMPL_PUSHNODE | |
13193 #undef PUGI_IMPL_POPNODE | |
13194 #undef PUGI_IMPL_SCANFOR | |
13195 #undef PUGI_IMPL_SCANWHILE | |
13196 #undef PUGI_IMPL_SCANWHILE_UNROLL | |
13197 #undef PUGI_IMPL_ENDSEG | |
13198 #undef PUGI_IMPL_THROW_ERROR | |
13199 #undef PUGI_IMPL_CHECK_ERROR | |
13200 | |
13201 #endif | |
13202 | |
13203 /** | |
13204 * Copyright (c) 2006-2023 Arseny Kapoulkine | |
13205 * | |
13206 * Permission is hereby granted, free of charge, to any person | |
13207 * obtaining a copy of this software and associated documentation | |
13208 * files (the "Software"), to deal in the Software without | |
13209 * restriction, including without limitation the rights to use, | |
13210 * copy, modify, merge, publish, distribute, sublicense, and/or sell | |
13211 * copies of the Software, and to permit persons to whom the | |
13212 * Software is furnished to do so, subject to the following | |
13213 * conditions: | |
13214 * | |
13215 * The above copyright notice and this permission notice shall be | |
13216 * included in all copies or substantial portions of the Software. | |
13217 * | |
13218 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |
13219 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES | |
13220 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |
13221 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT | |
13222 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
13223 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
13224 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR | |
13225 * OTHER DEALINGS IN THE SOFTWARE. | |
13226 */ |